diff --git itests/hive-blobstore/src/test/results/clientpositive/map_join_on_filter.q.out itests/hive-blobstore/src/test/results/clientpositive/map_join_on_filter.q.out index 653faab00a..087fa9417b 100644 --- itests/hive-blobstore/src/test/results/clientpositive/map_join_on_filter.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/map_join_on_filter.q.out @@ -28,6 +28,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/smbbucket_1.txt' INTO POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@src_a_data +Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(src1, src2) */ * FROM src_a_data src1 RIGHT OUTER JOIN src_a_data src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 3510016c07..cfe83d6cb1 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -15,6 +15,7 @@ disabled.query.files=cbo_rp_subq_in.q,\ sample2.q,\ sample4.q,\ root_dir_external_table.q,\ + sort_acid.q,\ input31.q @@ -210,6 +211,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ selectDistinctStar.q,\ select_dummy_source.q,\ skewjoin.q,\ + sort_acid.q,\ stats_noscan_1.q,\ stats_only_null.q,\ subquery_exists.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java index 9c61b316e2..d9d338c824 100644 --- ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java +++ ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java @@ -45,6 +45,7 @@ boolean hasOrderBy = false; boolean hasOuterOrderBy = false; boolean hasSortBy = false; + boolean hasLimit = false; boolean hasJoinFollowedByGroupBy = false; boolean hasPTF = false; boolean hasWindowing = false; @@ -184,6 +185,14 @@ public void setHasSortBy(boolean hasSortBy) { this.hasSortBy = hasSortBy; } + public void setHasLimit(boolean hasLimit) { + this.hasLimit = hasLimit; + } + + public boolean hasLimit() { + return hasLimit; + } + public boolean hasJoinFollowedByGroupBy() { return hasJoinFollowedByGroupBy; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java index e5f4c8492e..2caaba091f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import java.util.ArrayList; import java.util.List; import org.apache.calcite.plan.RelMultipleTrait; @@ -25,14 +26,23 @@ import org.apache.calcite.plan.RelTraitDef; import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelDistributionTraitDef; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.util.mapping.Mappings.TargetMapping; import com.google.common.collect.Ordering; public class HiveRelDistribution implements RelDistribution { - private static final Ordering> ORDERING = - Ordering.natural().lexicographical(); + private static final Ordering> ORDERING = Ordering.natural().lexicographical(); + + public static HiveRelDistribution from( + List fieldCollations, RelDistribution.Type distributionType) { + List newDistributionKeys = new ArrayList<>(fieldCollations.size()); + for (RelFieldCollation fieldCollation : fieldCollations) { + newDistributionKeys.add(fieldCollation.getFieldIndex()); + } + return new HiveRelDistribution(distributionType, newDistributionKeys); + } List keys; RelDistribution.Type type; @@ -70,7 +80,11 @@ public RelDistribution apply(TargetMapping mapping) { if (keys.isEmpty()) { return this; } - return new HiveRelDistribution(type, keys); + List newKeys = new ArrayList<>(keys.size()); + for (Integer key : keys) { + newKeys.add(mapping.getTargetOpt(key)); + } + return new HiveRelDistribution(type, newKeys); } @Override @@ -100,4 +114,12 @@ public int compareTo(RelMultipleTrait o) { return type.compareTo(distribution.getType()); } + @Override + public String toString() { + if (keys.isEmpty()) { + return type.shortName; + } else { + return type.shortName + keys; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index 04b3888a25..6be826e57a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -25,10 +25,12 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.RelFactories.AggregateFactory; import org.apache.calcite.rel.core.RelFactories.FilterFactory; import org.apache.calcite.rel.core.RelFactories.JoinFactory; @@ -50,6 +52,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; import com.google.common.collect.ImmutableList; @@ -71,6 +74,9 @@ public static final SortFactory HIVE_SORT_FACTORY = new HiveSortFactoryImpl(); + public static final RelFactories.SortExchangeFactory HIVE_SORT_EXCHANGE_FACTORY = + new HiveSortExchangeFactoryImpl(); + public static final AggregateFactory HIVE_AGGREGATE_FACTORY = new HiveAggregateFactoryImpl(); @@ -85,6 +91,7 @@ HIVE_JOIN_FACTORY, HIVE_SEMI_JOIN_FACTORY, HIVE_SORT_FACTORY, + HIVE_SORT_EXCHANGE_FACTORY, HIVE_AGGREGATE_FACTORY, HIVE_SET_OP_FACTORY)); @@ -195,6 +202,13 @@ public RelNode createSort(RelNode input, RelCollation collation, RexNode offset, } } + private static class HiveSortExchangeFactoryImpl implements RelFactories.SortExchangeFactory { + @Override + public RelNode createSortExchange(RelNode input, RelDistribution distribution, RelCollation collation) { + return HiveSortExchange.create(input, distribution, collation); + } + } + private static class HiveAggregateFactoryImpl implements AggregateFactory { @Override public RelNode createAggregate(RelNode child, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelJson.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelJson.java new file mode 100644 index 0000000000..31c9a9c232 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelJson.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.externalize.RelJson; +import org.apache.calcite.util.JsonBuilder; + +/** + * Hive extension of RelJson. + * Implement json serialization of types which are not support by Calcite 1.21.0. + * This class can be removed when Calcite is upgraded to 1.23.0 + */ +public class HiveRelJson extends RelJson { + private final JsonBuilder jsonBuilder; + + public HiveRelJson(JsonBuilder jsonBuilder) { + super(jsonBuilder); + this.jsonBuilder = jsonBuilder; + } + + @Override + public Object toJson(Object value) { + if (value instanceof RelDistribution) { + return toJson((RelDistribution) value); + } + return super.toJson(value); + } + + // Upgrade to Calcite 1.23.0 to remove this method + private Object toJson(RelDistribution relDistribution) { + final Map map = jsonBuilder.map(); + map.put("type", relDistribution.getType().name()); + + if (!relDistribution.getKeys().isEmpty()) { + List keys = new ArrayList<>(relDistribution.getKeys().size()); + for (Integer key : relDistribution.getKeys()) { + keys.add(toJson(key)); + } + map.put("keys", keys); + } + return map; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelJsonImpl.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelJsonImpl.java index 0d45eb0c61..65877e3c93 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelJsonImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelJsonImpl.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import java.lang.reflect.Field; import java.util.List; import java.util.Map; import org.apache.calcite.rel.RelNode; @@ -40,6 +41,15 @@ public HiveRelJsonImpl() { super(); + + // Upgrade to Calcite 1.23.0 to remove this + try { + final Field fieldRelJson = RelJsonWriter.class.getDeclaredField("relJson"); + fieldRelJson.setAccessible(true); + fieldRelJson.set(this, new HiveRelJson(jsonBuilder)); + } catch (IllegalAccessException | NoSuchFieldException e) { + throw new RuntimeException(e); + } } //~ Methods ------------------------------------------------------------------ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index e647b88961..1e2c1e2f87 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -35,6 +35,9 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelReferentialConstraint; import org.apache.calcite.rel.core.Aggregate; @@ -52,6 +55,7 @@ import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCallBinding; import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; @@ -61,10 +65,13 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.validate.SqlMonotonicity; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; +import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -1048,4 +1055,93 @@ public static String toJsonString(final RelNode rel) { return planWriter.asString(); } + /** + * Map Sort and SortExchange keys to the specified Project columns. + * @param project the Project + * @param sortCollation current collation in Sort + * @param cluster RelOptCluster + * @return new collation should be used in the Sort + */ + public static List getNewRelFieldCollations( + HiveProject project, RelCollation sortCollation, RelOptCluster cluster) { + // Determine mapping between project input and output fields. + // In Hive, Sort is always based on RexInputRef + // HiveSort*PullUpConstantsRule should remove constants (RexLiteral) + // We only need to check if project can contain all the positions that sortCollation needs. + final Mappings.TargetMapping map = + RelOptUtil.permutationIgnoreCast( + project.getProjects(), project.getInput().getRowType()).inverse(); + Set needed = new HashSet<>(); + for (RelFieldCollation fc : sortCollation.getFieldCollations()) { + needed.add(fc.getFieldIndex()); + final RexNode node = project.getProjects().get(map.getTarget(fc.getFieldIndex())); + if (node.isA(SqlKind.CAST)) { + // Check whether it is a monotonic preserving cast, otherwise we cannot push + final RexCall cast = (RexCall) node; + final RexCallBinding binding = + RexCallBinding.create(cluster.getTypeFactory(), cast, + ImmutableList.of(RexUtil.apply(map, sortCollation))); + if (cast.getOperator().getMonotonicity(binding) == SqlMonotonicity.NOT_MONOTONIC) { + return null; + } + } + } + Map m = new HashMap<>(); + for (int projPos = 0; projPos < project.getChildExps().size(); projPos++) { + RexNode expr = project.getChildExps().get(projPos); + if (expr instanceof RexInputRef) { + Set positions = HiveCalciteUtil.getInputRefs(expr); + if (positions.size() <= 1) { + int parentPos = positions.iterator().next(); + if(needed.contains(parentPos)){ + m.put(parentPos, projPos); + needed.remove(parentPos); + } + } + } + } + if(!needed.isEmpty()){ + return null; + } + + List fieldCollations = new ArrayList<>(); + for (RelFieldCollation fc : sortCollation.getFieldCollations()) { + fieldCollations.add(new RelFieldCollation(m.get(fc.getFieldIndex()), fc.direction, fc.nullDirection)); + } + return fieldCollations; + } + + /** + * Map Exchange distribution keys to the specified Project columns. + * @param project the Project + * @param distribution current distribution in Exchange + * @return new distribution should be used in the Exchange + */ + public static List getNewRelDistributionKeys( + HiveProject project, RelDistribution distribution) { + Set needed = new HashSet<>(distribution.getKeys()); + Map m = new HashMap<>(); + for (int projPos = 0; projPos < project.getChildExps().size(); projPos++) { + RexNode expr = project.getChildExps().get(projPos); + if (expr instanceof RexInputRef) { + Set positions = HiveCalciteUtil.getInputRefs(expr); + if (positions.size() <= 1) { + int parentPos = positions.iterator().next(); + if(needed.contains(parentPos)){ + m.put(parentPos, projPos); + needed.remove(parentPos); + } + } + } + } + if(!needed.isEmpty()){ + return null; + } + + List distributionKeys = new ArrayList<>(); + for (Integer keyIndex : distribution.getKeys()) { + distributionKeys.add(m.get(keyIndex)); + } + return distributionKeys; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java index 880cae70f9..8c46e449d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java @@ -21,24 +21,31 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelDistributionTraitDef; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.SortExchange; import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import com.google.common.collect.ImmutableList; -public class HiveSortExchange extends SortExchange { - private ImmutableList joinKeys; - private ExprNodeDesc[] joinExpressions; +/** + * Hive extension of calcite SortExchange. + * Add support of keys used when sorting or joining. + */ +public final class HiveSortExchange extends SortExchange implements HiveRelNode { + private final ImmutableList keys; + private ExprNodeDesc[] keyExpressions; private HiveSortExchange(RelOptCluster cluster, RelTraitSet traitSet, - RelNode input, RelDistribution distribution, RelCollation collation, ImmutableList joinKeys) { + RelNode input, RelDistribution distribution, RelCollation collation, ImmutableList keys) { super(cluster, traitSet, input, distribution, collation); - this.joinKeys = new ImmutableList.Builder().addAll(joinKeys).build(); + this.keys = new ImmutableList.Builder().addAll(keys).build(); } /** @@ -47,38 +54,59 @@ private HiveSortExchange(RelOptCluster cluster, RelTraitSet traitSet, * @param input Input relational expression * @param distribution Distribution specification * @param collation Collation specification - * @param joinKeys Join Keys specification + * @param keys Keys specification */ public static HiveSortExchange create(RelNode input, - RelDistribution distribution, RelCollation collation, ImmutableList joinKeys) { + RelDistribution distribution, RelCollation collation, ImmutableList keys) { RelOptCluster cluster = input.getCluster(); distribution = RelDistributionTraitDef.INSTANCE.canonize(distribution); collation = RelCollationTraitDef.INSTANCE.canonize(collation); - RelTraitSet traitSet = RelTraitSet.createEmpty().plus(distribution).plus(collation); - return new HiveSortExchange(cluster, traitSet, input, distribution, collation, joinKeys); + RelTraitSet traitSet = getTraitSet(collation, cluster); + return new HiveSortExchange(cluster, traitSet, input, distribution, collation, keys); + } + + private static RelTraitSet getTraitSet(RelCollation collation, RelOptCluster cluster) { + // add distribution + return TraitsUtil.getDefaultTraitSet(cluster).replace(collation); + } + + public static HiveSortExchange create(RelNode input, + RelDistribution distribution, RelCollation collation) { + RelOptCluster cluster = input.getCluster(); + distribution = RelDistributionTraitDef.INSTANCE.canonize(distribution); + collation = RelCollationTraitDef.INSTANCE.canonize(collation); + RelTraitSet traitSet = getTraitSet(collation, cluster); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(collation.getFieldCollations())); + + ImmutableList.Builder builder = ImmutableList.builder(); + for (RelFieldCollation relFieldCollation : canonizedCollation.getFieldCollations()) { + int index = relFieldCollation.getFieldIndex(); + builder.add(cluster.getRexBuilder().makeInputRef(input, index)); + } + + return new HiveSortExchange(cluster, traitSet, input, distribution, collation, builder.build()); } @Override public SortExchange copy(RelTraitSet traitSet, RelNode newInput, RelDistribution newDistribution, RelCollation newCollation) { return new HiveSortExchange(getCluster(), traitSet, newInput, - newDistribution, newCollation, joinKeys); + newDistribution, newCollation, keys); } - public ImmutableList getJoinKeys() { - return joinKeys; + public ImmutableList getKeys() { + return keys; } - public void setJoinKeys(ImmutableList joinKeys) { - this.joinKeys = joinKeys; + public ExprNodeDesc[] getKeyExpressions() { + return keyExpressions; } - public ExprNodeDesc[] getJoinExpressions() { - return joinExpressions; + public void setKeyExpressions(ExprNodeDesc[] keyExpressions) { + this.keyExpressions = keyExpressions; } - public void setJoinExpressions(ExprNodeDesc[] joinExpressions) { - this.joinExpressions = joinExpressions; + @Override + public void implement(Implementor implementor) { } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortExchangeTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortExchangeTransposeRule.java new file mode 100644 index 0000000000..06cbe3da10 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortExchangeTransposeRule.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import static org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.getNewRelDistributionKeys; +import static org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.getNewRelFieldCollations; + +import java.util.List; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationImpl; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.SortExchange; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; + +import com.google.common.collect.ImmutableList; + +/** + * Push down Projection above SortExchange. + * HiveProject + * HiveSortExchange + * ... + * + * => + * + * HiveSortExchange + * HiveProject + * ... + */ +public final class HiveProjectSortExchangeTransposeRule extends RelOptRule { + public static final HiveProjectSortExchangeTransposeRule INSTANCE = new HiveProjectSortExchangeTransposeRule(); + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates a HiveProjectSortTransposeRule. + */ + private HiveProjectSortExchangeTransposeRule() { + super( + operand( + HiveProject.class, + operand(HiveSortExchange.class, any()))); + } + + protected HiveProjectSortExchangeTransposeRule(RelOptRuleOperand operand) { + super(operand); + } + + //~ Methods ---------------------------------------------------------------- + + // implement RelOptRule + public void onMatch(RelOptRuleCall call) { + final HiveProject project = call.rel(0); + final HiveSortExchange sortExchange = call.rel(1); + final RelOptCluster cluster = project.getCluster(); + + List fieldCollations = getNewRelFieldCollations(project, sortExchange.getCollation(), cluster); + if (fieldCollations == null) { + return; + } + + RelTraitSet newTraitSet = TraitsUtil.getDefaultTraitSet(sortExchange.getCluster()); + RelCollation newCollation = newTraitSet.canonize(RelCollationImpl.of(fieldCollations)); + newTraitSet = newTraitSet.replace(newCollation); + List newDistributionKeys = getNewRelDistributionKeys(project, sortExchange.getDistribution()); + + // New operators + final RelNode newProject = project.copy(sortExchange.getInput().getTraitSet(), + ImmutableList.of(sortExchange.getInput())); + final SortExchange newSort = sortExchange.copy( + newTraitSet, + newProject, + new HiveRelDistribution(sortExchange.getDistribution().getType(), newDistributionKeys), + newCollation); + + call.transformTo(newSort); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java index 871c411e70..9ac9553440 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java @@ -17,32 +17,19 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import static org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.getNewRelFieldCollations; + import org.apache.calcite.plan.RelOptCluster; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Set; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptRuleOperand; -import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexCallBinding; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.validate.SqlMonotonicity; -import org.apache.calcite.util.mapping.Mappings; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; @@ -51,8 +38,7 @@ public class HiveProjectSortTransposeRule extends RelOptRule { - public static final HiveProjectSortTransposeRule INSTANCE = - new HiveProjectSortTransposeRule(); + public static final HiveProjectSortTransposeRule INSTANCE = new HiveProjectSortTransposeRule(); //~ Constructors ----------------------------------------------------------- @@ -77,64 +63,20 @@ public void onMatch(RelOptRuleCall call) { final HiveProject project = call.rel(0); final HiveSortLimit sort = call.rel(1); final RelOptCluster cluster = project.getCluster(); - - // Determine mapping between project input and output fields. - // In Hive, Sort is always based on RexInputRef - // We only need to check if project can contain all the positions that sort needs. - final Mappings.TargetMapping map = - RelOptUtil.permutationIgnoreCast( - project.getProjects(), project.getInput().getRowType()).inverse(); - Set needed = new HashSet<>(); - for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { - needed.add(fc.getFieldIndex()); - final RexNode node = project.getProjects().get(map.getTarget(fc.getFieldIndex())); - if (node.isA(SqlKind.CAST)) { - // Check whether it is a monotonic preserving cast, otherwise we cannot push - final RexCall cast = (RexCall) node; - final RexCallBinding binding = - RexCallBinding.create(cluster.getTypeFactory(), cast, - ImmutableList.of(RexUtil.apply(map, sort.getCollation()))); - if (cast.getOperator().getMonotonicity(binding) == SqlMonotonicity.NOT_MONOTONIC) { - return; - } - } - } - Map m = new HashMap<>(); - for (int projPos = 0; projPos < project.getChildExps().size(); projPos++) { - RexNode expr = project.getChildExps().get(projPos); - if (expr instanceof RexInputRef) { - Set positions = HiveCalciteUtil.getInputRefs(expr); - if (positions.size() > 1) { - continue; - } else { - int parentPos = positions.iterator().next(); - if(needed.contains(parentPos)){ - m.put(parentPos, projPos); - needed.remove(parentPos); - } - } - } - } - if(!needed.isEmpty()){ + List fieldCollations = getNewRelFieldCollations(project, sort.getCollation(), cluster); + if (fieldCollations == null) { return; } - - List fieldCollations = new ArrayList<>(); - for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { - fieldCollations.add(new RelFieldCollation(m.get(fc.getFieldIndex()), fc.direction, - fc.nullDirection)); - } RelTraitSet traitSet = sort.getCluster().traitSetOf(HiveRelNode.CONVENTION); RelCollation newCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); - + // New operators final RelNode newProject = project.copy(sort.getInput().getTraitSet(), - ImmutableList.of(sort.getInput())); + ImmutableList.of(sort.getInput())); final HiveSortLimit newSort = sort.copy(newProject.getTraitSet(), - newProject, newCollation, sort.offset, sort.fetch); + newProject, newCollation, sort.offset, sort.fetch); call.transformTo(newSort); } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 53d68e872a..19ce3ea223 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -31,6 +31,9 @@ import org.apache.calcite.linq4j.Ord; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; @@ -67,6 +70,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; @@ -877,4 +881,48 @@ public static void initializeFieldTrimmerClass(List> no LOG.warn("Error initializing field trimmer instance", t); } } + + public TrimResult trimFields( + HiveSortExchange exchange, + ImmutableBitSet fieldsUsed, + Set extraFields) { + final RelDataType rowType = exchange.getRowType(); + final int fieldCount = rowType.getFieldCount(); + final RelCollation collation = exchange.getCollation(); + final RelDistribution distribution = exchange.getDistribution(); + final RelNode input = exchange.getInput(); + + // We use the fields used by the consumer, plus any fields used as exchange + // keys. + final ImmutableBitSet.Builder inputFieldsUsed = fieldsUsed.rebuild(); + for (RelFieldCollation field : collation.getFieldCollations()) { + inputFieldsUsed.set(field.getFieldIndex()); + } + for (int keyIndex : distribution.getKeys()) { + inputFieldsUsed.set(keyIndex); + } + + // Create input with trimmed columns. + final Set inputExtraFields = Collections.emptySet(); + TrimResult trimResult = + trimChild(exchange, input, inputFieldsUsed.build(), inputExtraFields); + RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + // If the input is unchanged, and we need to project all columns, + // there's nothing we can do. + if (newInput == input + && inputMapping.isIdentity() + && fieldsUsed.cardinality() == fieldCount) { + return result(exchange, Mappings.createIdentity(fieldCount)); + } + + final RelBuilder relBuilder = REL_BUILDER.get(); + relBuilder.push(newInput); + RelCollation newCollation = RexUtil.apply(inputMapping, collation); + RelDistribution newDistribution = distribution.apply(inputMapping); + relBuilder.sortExchange(newDistribution, newCollation); + + return result(relBuilder.build(), inputMapping); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java deleted file mode 100644 index e51b2b6ebc..0000000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.calcite.rules; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.calcite.plan.RelOptPredicateList; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.plan.hep.HepRelVertex; -import org.apache.calcite.rel.RelCollations; -import org.apache.calcite.rel.RelFieldCollation; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Sort; -import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexLiteral; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.tools.RelBuilder; -import org.apache.calcite.tools.RelBuilderFactory; -import org.apache.calcite.util.Pair; -import org.apache.calcite.util.mapping.Mappings; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.ImmutableList; - -/** - * Planner rule that pulls up constant keys through a SortLimit operator. - * - * This rule is only applied on SortLimit operators that are not the root - * of the plan tree. This is done because the interaction of this rule - * with the AST conversion may cause some optimizations to not kick in - * e.g. SimpleFetchOptimizer. Nevertheless, this will not have any - * performance impact in the resulting plans. - */ -public class HiveSortLimitPullUpConstantsRule extends RelOptRule { - - protected static final Logger LOG = LoggerFactory.getLogger(HiveSortLimitPullUpConstantsRule.class); - - - public static final HiveSortLimitPullUpConstantsRule INSTANCE = - new HiveSortLimitPullUpConstantsRule(HiveSortLimit.class, - HiveRelFactories.HIVE_BUILDER); - - private HiveSortLimitPullUpConstantsRule(Class sortClass, - RelBuilderFactory relBuilderFactory) { - super(operand(RelNode.class, unordered(operand(sortClass, any()))), relBuilderFactory, null); - } - - @Override - public void onMatch(RelOptRuleCall call) { - final RelNode parent = call.rel(0); - final Sort sort = call.rel(1); - - final int count = sort.getInput().getRowType().getFieldCount(); - if (count == 1) { - // No room for optimization since we cannot convert to an empty - // Project operator. - return; - } - - final RexBuilder rexBuilder = sort.getCluster().getRexBuilder(); - final RelMetadataQuery mq = call.getMetadataQuery(); - final RelOptPredicateList predicates = mq.getPulledUpPredicates(sort.getInput()); - if (predicates == null) { - return; - } - - Map conditionsExtracted = HiveReduceExpressionsRule.predicateConstants( - RexNode.class, rexBuilder, predicates); - Map constants = new HashMap<>(); - for (int i = 0; i < count ; i++) { - RexNode expr = rexBuilder.makeInputRef(sort.getInput(), i); - if (conditionsExtracted.containsKey(expr)) { - constants.put(expr, conditionsExtracted.get(expr)); - } - } - - // None of the expressions are constant. Nothing to do. - if (constants.isEmpty()) { - return; - } - - if (count == constants.size()) { - // At least a single item in project is required. - constants.remove(constants.keySet().iterator().next()); - } - - // Create expressions for Project operators before and after the Sort - List fields = sort.getInput().getRowType().getFieldList(); - List> newChildExprs = new ArrayList<>(); - List topChildExprs = new ArrayList<>(); - List topChildExprsFields = new ArrayList<>(); - for (int i = 0; i < count ; i++) { - RexNode expr = rexBuilder.makeInputRef(sort.getInput(), i); - RelDataTypeField field = fields.get(i); - if (constants.containsKey(expr)) { - topChildExprs.add(constants.get(expr)); - topChildExprsFields.add(field.getName()); - } else { - newChildExprs.add(Pair.of(expr, field.getName())); - topChildExprs.add(expr); - topChildExprsFields.add(field.getName()); - } - } - - // Update field collations - final Mappings.TargetMapping mapping = - RelOptUtil.permutation(Pair.left(newChildExprs), sort.getInput().getRowType()).inverse(); - List fieldCollations = new ArrayList<>(); - for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { - final int target = mapping.getTargetOpt(fc.getFieldIndex()); - if (target < 0) { - // It is a constant, we can ignore it - continue; - } - fieldCollations.add(fc.copy(target)); - } - - // Update top Project positions - topChildExprs = ImmutableList.copyOf(RexUtil.apply(mapping, topChildExprs)); - - // Create new Project-Sort-Project sequence - final RelBuilder relBuilder = call.builder(); - relBuilder.push(sort.getInput()); - relBuilder.project(Pair.left(newChildExprs), Pair.right(newChildExprs)); - final ImmutableList sortFields = - relBuilder.fields(RelCollations.of(fieldCollations)); - relBuilder.sortLimit(sort.offset == null ? -1 : RexLiteral.intValue(sort.offset), - sort.fetch == null ? -1 : RexLiteral.intValue(sort.fetch), sortFields); - // Create top Project fixing nullability of fields - relBuilder.project(topChildExprs, topChildExprsFields); - relBuilder.convert(sort.getRowType(), false); - - List inputs = new ArrayList<>(); - for (RelNode child : parent.getInputs()) { - if (!((HepRelVertex) child).getCurrentRel().equals(sort)) { - inputs.add(child); - } else { - inputs.add(relBuilder.build()); - } - } - call.transformTo(parent.copy(parent.getTraitSet(), inputs)); - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortPullUpConstantsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortPullUpConstantsRule.java new file mode 100644 index 0000000000..11fa86cc62 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortPullUpConstantsRule.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.ImmutableList; + +/** + * Planner rule that pulls up constant keys through a SortLimit or SortExchange operator. + * + * This rule is only applied on SortLimit operators that are not the root + * of the plan tree. This is done because the interaction of this rule + * with the AST conversion may cause some optimizations to not kick in + * e.g. SimpleFetchOptimizer. Nevertheless, this will not have any + * performance impact in the resulting plans. + */ +public final class HiveSortPullUpConstantsRule { + + public static final HiveSortPullUpConstantsRuleBase SORT_LIMIT_INSTANCE = + new HiveSortLimitPullUpConstantsRule(); + + private static final class HiveSortLimitPullUpConstantsRule + extends HiveSortPullUpConstantsRuleBase { + + protected HiveSortLimitPullUpConstantsRule() { + super(HiveSortLimit.class); + } + + @Override + protected void buildSort(RelBuilder relBuilder, HiveSortLimit sortNode, Mappings.TargetMapping mapping) { + List fieldCollations = applyToFieldCollations(sortNode.getCollation(), mapping); + final ImmutableList sortFields = + relBuilder.fields(RelCollations.of(fieldCollations)); + relBuilder.sortLimit(sortNode.offset == null ? -1 : RexLiteral.intValue(sortNode.offset), + sortNode.fetch == null ? -1 : RexLiteral.intValue(sortNode.fetch), sortFields); + } + } + + public static final HiveSortExchangePullUpConstantsRule SORT_EXCHANGE_INSTANCE = + new HiveSortExchangePullUpConstantsRule(); + + private static final class HiveSortExchangePullUpConstantsRule + extends HiveSortPullUpConstantsRuleBase { + + protected HiveSortExchangePullUpConstantsRule() { + super(HiveSortExchange.class); + } + + @Override + protected void buildSort(RelBuilder relBuilder, HiveSortExchange sortNode, Mappings.TargetMapping mapping) { + List fieldCollations = applyToFieldCollations(sortNode.getCollation(), mapping); + RelDistribution distribution = sortNode.getDistribution().apply(mapping); + relBuilder.sortExchange(distribution, RelCollations.of(fieldCollations)); + } + } + + + public static abstract class HiveSortPullUpConstantsRuleBase extends RelOptRule { + + protected HiveSortPullUpConstantsRuleBase(Class sortClass) { + super(operand(RelNode.class, unordered(operand(sortClass, any()))), HiveRelFactories.HIVE_BUILDER, null); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final RelNode parent = call.rel(0); + final T sortNode = call.rel(1); + + final int count = sortNode.getInput().getRowType().getFieldCount(); + if (count == 1) { + // No room for optimization since we cannot convert to an empty + // Project operator. + return; + } + + final RexBuilder rexBuilder = sortNode.getCluster().getRexBuilder(); + final RelMetadataQuery mq = call.getMetadataQuery(); + final RelOptPredicateList predicates = mq.getPulledUpPredicates(sortNode.getInput()); + if (predicates == null) { + return; + } + + Map conditionsExtracted = HiveReduceExpressionsRule.predicateConstants( + RexNode.class, rexBuilder, predicates); + Map constants = new HashMap<>(); + for (int i = 0; i < count; i++) { + RexNode expr = rexBuilder.makeInputRef(sortNode.getInput(), i); + if (conditionsExtracted.containsKey(expr)) { + constants.put(expr, conditionsExtracted.get(expr)); + } + } + + // None of the expressions are constant. Nothing to do. + if (constants.isEmpty()) { + return; + } + + if (count == constants.size()) { + // At least a single item in project is required. + constants.remove(constants.keySet().iterator().next()); + } + + // Create expressions for Project operators before and after the Sort + List fields = sortNode.getInput().getRowType().getFieldList(); + List> newChildExprs = new ArrayList<>(); + List topChildExprs = new ArrayList<>(); + List topChildExprsFields = new ArrayList<>(); + for (int i = 0; i < count; i++) { + RexNode expr = rexBuilder.makeInputRef(sortNode.getInput(), i); + RelDataTypeField field = fields.get(i); + if (constants.containsKey(expr)) { + topChildExprs.add(constants.get(expr)); + topChildExprsFields.add(field.getName()); + } else { + newChildExprs.add(Pair.of(expr, field.getName())); + topChildExprs.add(expr); + topChildExprsFields.add(field.getName()); + } + } + + // Update field collations + final Mappings.TargetMapping mapping = + RelOptUtil.permutation(Pair.left(newChildExprs), sortNode.getInput().getRowType()).inverse(); + + // Update top Project positions + topChildExprs = ImmutableList.copyOf(RexUtil.apply(mapping, topChildExprs)); + + // Create new Project-Sort-Project sequence + final RelBuilder relBuilder = call.builder(); + relBuilder.push(sortNode.getInput()); + relBuilder.project(Pair.left(newChildExprs), Pair.right(newChildExprs)); + buildSort(relBuilder, sortNode, mapping); + // Create top Project fixing nullability of fields + relBuilder.project(topChildExprs, topChildExprsFields); + relBuilder.convert(sortNode.getRowType(), false); + + List inputs = new ArrayList<>(); + for (RelNode child : parent.getInputs()) { + if (!((HepRelVertex) child).getCurrentRel().equals(sortNode)) { + inputs.add(child); + } else { + inputs.add(relBuilder.build()); + } + } + call.transformTo(parent.copy(parent.getTraitSet(), inputs)); + } + + protected List applyToFieldCollations( + RelCollation relCollation, Mappings.TargetMapping mapping) { + List fieldCollations = new ArrayList<>(); + for (RelFieldCollation fc : relCollation.getFieldCollations()) { + final int target = mapping.getTargetOpt(fc.getFieldIndex()); + if (target < 0) { + // It is a constant, we can ignore it + continue; + } + fieldCollations.add(fc.copy(target)); + } + return fieldCollations; + } + + protected abstract void buildSort(RelBuilder relBuilder, T sortNode, Mappings.TargetMapping mapping); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index e03e96ff12..ee21a1eeb2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -32,6 +32,7 @@ import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Aggregate.Group; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Exchange; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; @@ -62,6 +63,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; @@ -88,7 +90,7 @@ private Aggregate groupBy; private Filter having; private RelNode select; - private Sort orderLimit; + private RelNode orderLimit; private Schema schema; @@ -255,7 +257,7 @@ private ASTNode convert() throws CalciteSemanticException { * its parent. * 8. Limit */ - convertOrderLimitToASTNode((HiveSortLimit) orderLimit); + convertOrderToASTNode(orderLimit); return hiveAST.getAST(); } @@ -275,75 +277,98 @@ private ASTNode buildUDTFAST(String functionName, List children) { return node; } - private void convertOrderLimitToASTNode(HiveSortLimit order) { - if (order != null) { - HiveSortLimit hiveSortLimit = order; - if (!hiveSortLimit.getCollation().getFieldCollations().isEmpty()) { - // 1 Add order by token - ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - - schema = new Schema(hiveSortLimit); - Map obRefToCallMap = hiveSortLimit.getInputRefToCallMap(); - RexNode obExpr; - ASTNode astCol; - for (RelFieldCollation c : hiveSortLimit.getCollation().getFieldCollations()) { - - // 2 Add Direction token - ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); - ASTNode nullDirectionAST; - // Null direction - if (c.nullDirection == RelFieldCollation.NullDirection.FIRST) { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); - directionAST.addChild(nullDirectionAST); - } else if (c.nullDirection == RelFieldCollation.NullDirection.LAST) { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); - directionAST.addChild(nullDirectionAST); - } else { - // Default - if (c.getDirection() == RelFieldCollation.Direction.ASCENDING) { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); - directionAST.addChild(nullDirectionAST); - } else { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); - directionAST.addChild(nullDirectionAST); - } - } + private void convertOrderToASTNode(RelNode node) { + if (node == null) { + return; + } - // 3 Convert OB expr (OB Expr is usually an input ref except for top - // level OB; top level OB will have RexCall kept in a map.) - obExpr = null; - if (obRefToCallMap != null) { - obExpr = obRefToCallMap.get(c.getFieldIndex()); - } + if (node instanceof HiveSortLimit) { + convertOrderLimitToASTNode((HiveSortLimit) node); + } else if (node instanceof HiveSortExchange) { + convertSortToASTNode((HiveSortExchange) node); + } + } - if (obExpr != null) { - astCol = obExpr.accept(new RexVisitor(schema, false, order.getCluster().getRexBuilder())); - } else { - ColumnInfo cI = schema.get(c.getFieldIndex()); - /* - * The RowResolver setup for Select drops Table associations. So - * setup ASTNode on unqualified name. - */ - astCol = ASTBuilder.unqualifiedName(cI.column); - } + private void convertOrderLimitToASTNode(HiveSortLimit hiveSortLimit) { + List fieldCollations = hiveSortLimit.getCollation().getFieldCollations(); + convertFieldCollationsToASTNode(hiveSortLimit, new Schema(hiveSortLimit), fieldCollations, + hiveSortLimit.getInputRefToCallMap(), HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - // 4 buildup the ob expr AST - nullDirectionAST.addChild(astCol); - orderAst.addChild(directionAST); + RexNode offsetExpr = hiveSortLimit.getOffsetExpr(); + RexNode fetchExpr = hiveSortLimit.getFetchExpr(); + if (fetchExpr != null) { + Object offset = (offsetExpr == null) ? Integer.valueOf(0) : ((RexLiteral) offsetExpr).getValue2(); + Object fetch = ((RexLiteral) fetchExpr).getValue2(); + hiveAST.limit = ASTBuilder.limit(offset, fetch); + } + } + + private void convertSortToASTNode(HiveSortExchange hiveSortExchange) { + List fieldCollations = hiveSortExchange.getCollation().getFieldCollations(); + convertFieldCollationsToASTNode(hiveSortExchange, new Schema(hiveSortExchange), fieldCollations, + null, HiveParser.TOK_SORTBY, "TOK_SORTBY"); + } + + private void convertFieldCollationsToASTNode( + RelNode node, Schema schema, List fieldCollations, Map obRefToCallMap, + int astToken, String astText) { + if (fieldCollations.isEmpty()) { + return; + } + + // 1 Add order/sort by token + ASTNode orderAst = ASTBuilder.createAST(astToken, astText); + + RexNode obExpr; + ASTNode astCol; + for (RelFieldCollation c : fieldCollations) { + + // 2 Add Direction token + ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); + ASTNode nullDirectionAST; + // Null direction + if (c.nullDirection == RelFieldCollation.NullDirection.FIRST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else if (c.nullDirection == RelFieldCollation.NullDirection.LAST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); + } else { + // Default + if (c.getDirection() == RelFieldCollation.Direction.ASCENDING) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); } - hiveAST.order = orderAst; } - RexNode offsetExpr = hiveSortLimit.getOffsetExpr(); - RexNode fetchExpr = hiveSortLimit.getFetchExpr(); - if (fetchExpr != null) { - Object offset = (offsetExpr == null) ? Integer.valueOf(0) : ((RexLiteral) offsetExpr).getValue2(); - Object fetch = ((RexLiteral) fetchExpr).getValue2(); - hiveAST.limit = ASTBuilder.limit(offset, fetch); + // 3 Convert OB expr (OB Expr is usually an input ref except for top + // level OB; top level OB will have RexCall kept in a map.) + obExpr = null; + if (obRefToCallMap != null) { + obExpr = obRefToCallMap.get(c.getFieldIndex()); } + + if (obExpr != null) { + astCol = obExpr.accept(new RexVisitor(schema, false, node.getCluster().getRexBuilder())); + } else { + ColumnInfo cI = schema.get(c.getFieldIndex()); + /* + * The RowResolver setup for Select drops Table associations. So + * setup ASTNode on unqualified name. + */ + astCol = ASTBuilder.unqualifiedName(cI.column); + } + + // 4 buildup the ob expr AST + nullDirectionAST.addChild(astCol); + orderAst.addChild(directionAST); } + hiveAST.order = orderAst; } private Schema getRowSchema(String tblAlias) { @@ -463,11 +488,11 @@ public void visit(RelNode node, int ordinal, RelNode parent) { ASTConverter.this.from = node; } else if (node instanceof Aggregate) { ASTConverter.this.groupBy = (Aggregate) node; - } else if (node instanceof Sort) { + } else if (node instanceof Sort || node instanceof Exchange) { if (ASTConverter.this.select != null) { ASTConverter.this.from = node; } else { - ASTConverter.this.orderLimit = (Sort) node; + ASTConverter.this.orderLimit = node; } } /* @@ -872,11 +897,12 @@ public QueryBlockInfo(Schema schema, ASTNode ast) { * Hive Sort Node * @return Schema */ - public Schema(HiveSortLimit order) { - Project select = (Project) order.getInput(); - for (String projName : select.getRowType().getFieldNames()) { - add(new ColumnInfo(null, projName)); - } + Schema(HiveSortLimit order) { + this((Project) order.getInput(), null); + } + + Schema(HiveSortExchange sort) { + this((Project) sort.getInput(), null); } public Schema(String tabAlias, List fieldList) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java index 31619c0314..fe4ecf0aba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java @@ -28,6 +28,7 @@ import org.apache.calcite.rel.SingleRel; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Exchange; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.Project; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -70,7 +72,7 @@ public static RelNode convertOpTree(RelNode rel, List resultSchema, LOG.debug("Original plan for PlanModifier\n " + RelOptUtil.toString(newTopNode)); } - if (!(newTopNode instanceof Project) && !(newTopNode instanceof Sort)) { + if (!(newTopNode instanceof Project) && !(newTopNode instanceof Sort) && !(newTopNode instanceof Exchange)) { newTopNode = introduceDerivedTable(newTopNode); if (LOG.isDebugEnabled()) { LOG.debug("Plan after top-level introduceDerivedTable\n " @@ -174,6 +176,10 @@ private static void convertOpTree(RelNode rel, RelNode parent) { if (!validSortChild((HiveSortLimit) rel)) { introduceDerivedTable(((HiveSortLimit) rel).getInput(), rel); } + } else if (rel instanceof HiveSortExchange) { + if (!validExchangeChild((HiveSortExchange) rel)) { + introduceDerivedTable(((HiveSortExchange) rel).getInput(), rel); + } } else if (rel instanceof HiveAggregate) { RelNode newParent = parent; if (!validGBParent(rel, parent)) { @@ -358,6 +364,10 @@ private static boolean validSortChild(HiveSortLimit sortNode) { return validChild; } + private static boolean validExchangeChild(HiveSortExchange sortNode) { + return sortNode.getInput() instanceof Project; + } + private static boolean validSetopParent(RelNode setop, RelNode parent) { boolean validChild = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/HiveSortExchangeVisitor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/HiveSortExchangeVisitor.java index 68227db1ee..8a784c8727 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/HiveSortExchangeVisitor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/HiveSortExchangeVisitor.java @@ -53,12 +53,12 @@ OpAttr visit(HiveSortExchange exchangeRel) throws SemanticException { if (distribution.getType() != Type.HASH_DISTRIBUTED) { throw new SemanticException("Only hash distribution supported for LogicalExchange"); } - ExprNodeDesc[] expressions = new ExprNodeDesc[exchangeRel.getJoinKeys().size()]; - for (int index = 0; index < exchangeRel.getJoinKeys().size(); index++) { - expressions[index] = HiveOpConverterUtils.convertToExprNode(exchangeRel.getJoinKeys().get(index), + ExprNodeDesc[] expressions = new ExprNodeDesc[exchangeRel.getKeys().size()]; + for (int index = 0; index < exchangeRel.getKeys().size(); index++) { + expressions[index] = HiveOpConverterUtils.convertToExprNode(exchangeRel.getKeys().get(index), exchangeRel.getInput(), inputOpAf.tabAlias, inputOpAf.vcolsInCalcite); } - exchangeRel.setJoinExpressions(expressions); + exchangeRel.setKeyExpressions(expressions); ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), tabAlias, expressions, -1, -1, Operation.NOT_ACID, hiveOpConverter.getHiveConf()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/JoinVisitor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/JoinVisitor.java index 0286d54ea0..8d9d5aedb5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/JoinVisitor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/JoinVisitor.java @@ -103,7 +103,7 @@ OpAttr visit(RelNode joinRel) throws SemanticException { // 4. Extract join key expressions from HiveSortExchange ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][]; for (int i = 0; i < inputs.length; i++) { - joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getJoinExpressions(); + joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getKeyExpressions(); } // 5. Extract rest of join predicate info. We infer the rest of join condition diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 6589eeb39b..9c39cb20bb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -70,6 +70,8 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelDistributions; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelVisitor; @@ -203,6 +205,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectOverIntersectRemoveRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortExchangeTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsWithStatsRule; @@ -213,10 +216,10 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule; @@ -240,7 +243,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; -import org.apache.hadoop.hive.ql.parse.type.JoinCondTypeCheckProcFactory; import org.apache.hadoop.hive.ql.parse.type.JoinTypeCheckCtx; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.PlanModifierForReturnPath; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; @@ -951,7 +953,8 @@ private static String canHandleQbForCbo(QueryProperties queryProperties, HiveCon boolean topLevelQB, boolean verbose) { if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() - && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript() + && !(queryProperties.hasSortBy() && queryProperties.hasLimit()) + && !queryProperties.hasPTF() && !queryProperties.usesScript() && queryProperties.isCBOSupportedLateralViews()) { // Ok to run CBO. return null; @@ -966,8 +969,8 @@ private static String canHandleQbForCbo(QueryProperties queryProperties, HiveCon if (queryProperties.hasDistributeBy()) { msg += "has distribute by; "; } - if (queryProperties.hasSortBy()) { - msg += "has sort by; "; + if (queryProperties.hasSortBy() && queryProperties.hasLimit()) { + msg += "has sort by with limit; "; } if (queryProperties.hasPTF()) { msg += "has PTF; "; @@ -2012,7 +2015,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); rules.add(HiveSortMergeRule.INSTANCE); - rules.add(HiveSortLimitPullUpConstantsRule.INSTANCE); + rules.add(HiveSortPullUpConstantsRule.SORT_LIMIT_INSTANCE); + rules.add(HiveSortPullUpConstantsRule.SORT_EXCHANGE_INSTANCE); rules.add(HiveUnionPullUpConstantsRule.INSTANCE); rules.add(HiveAggregatePullUpConstantsRule.INSTANCE); generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP, @@ -2435,6 +2439,9 @@ private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProv // 9.2. Introduce exchange operators below join/multijoin operators generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); + } else { + generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, + HiveProjectSortExchangeTransposeRule.INSTANCE, HiveProjectMergeRule.INSTANCE); } // Trigger program @@ -3949,217 +3956,279 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException * @throws SemanticException */ private RelNode genOBLogicalPlan(QB qb, Pair selPair, - boolean outermostOB) throws SemanticException { - // selPair.getKey() is the operator right before OB - // selPair.getValue() is RR which only contains columns needed in result - // set. Extra columns needed by order by will be absent from it. - RelNode srcRel = selPair.getKey(); - RowResolver selectOutputRR = selPair.getValue(); - RelNode sortRel = null; - RelNode returnRel = null; + boolean outermostOB) throws SemanticException { QBParseInfo qbp = getQBParseInfo(qb); String dest = qbp.getClauseNames().iterator().next(); ASTNode obAST = qbp.getOrderByForClause(dest); - if (obAST != null) { - // 1. OB Expr sanity test - // in strict mode, in the presence of order by, limit must be - // specified - Integer limit = qb.getParseInfo().getDestLimit(dest); - if (limit == null) { - String error = StrictChecks.checkNoLimit(conf); - if (error != null) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error)); - } + if (obAST == null) { + return null; + } + + // 1. OB Expr sanity test + // in strict mode, in the presence of order by, limit must be + // specified + Integer limit = qb.getParseInfo().getDestLimit(dest); + if (limit == null) { + String error = StrictChecks.checkNoLimit(conf); + if (error != null) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error)); } + } - // 2. Walk through OB exprs and extract field collations and additional - // virtual columns needed - final List newVCLst = new ArrayList(); - final List fieldCollations = Lists.newArrayList(); - int fieldIndex = 0; + OBLogicalPlanGenState obLogicalPlanGenState = beginGenOBLogicalPlan(obAST, selPair, outermostOB); - List obASTExprLst = obAST.getChildren(); - ASTNode obASTExpr; - ASTNode nullObASTExpr; - List> vcASTTypePairs = new ArrayList>(); - RowResolver inputRR = relToHiveRR.get(srcRel); - RowResolver outputRR = new RowResolver(); - - RexNode rnd; - RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), - relToHiveColNameCalcitePosMap.get(srcRel), 0, false); - int srcRelRecordSz = srcRel.getRowType().getFieldCount(); - - for (int i = 0; i < obASTExprLst.size(); i++) { - // 2.1 Convert AST Expr to ExprNode - obASTExpr = (ASTNode) obASTExprLst.get(i); - nullObASTExpr = (ASTNode) obASTExpr.getChild(0); - ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); - Map astToExprNDescMap = null; - ExprNodeDesc obExprNDesc = null; - - boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); - boolean isObyByPos = isBothByPos - || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); - // replace each of the position alias in ORDERBY with the actual column - if (ref != null && ref.getToken().getType() == HiveParser.Number) { - if (isObyByPos) { - int pos = Integer.parseInt(ref.getText()); - if (pos > 0 && pos <= selectOutputRR.getColumnInfos().size()) { - // fieldIndex becomes so simple - // Note that pos starts from 1 while fieldIndex starts from 0; - fieldIndex = pos - 1; - } else { - throw new SemanticException( - ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg("Position alias: " + pos - + " does not exist\n" + "The Select List is indexed from 1 to " - + selectOutputRR.getColumnInfos().size())); - } - } else { // if not using position alias and it is a number. - LOG.warn("Using constant number " - + ref.getText() - + " in order by. If you try to use position alias when hive.orderby.position.alias is false, the position alias will be ignored."); - } - } else { - // first try to get it from select - // in case of udtf, selectOutputRR may be null. - if (selectOutputRR != null) { - try { - astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR); - obExprNDesc = astToExprNDescMap.get(ref); - } catch (SemanticException ex) { - // we can tolerate this as this is the previous behavior - LOG.debug("Can not find column in " + ref.getText() + ". The error msg is " - + ex.getMessage()); - } - } - // then try to get it from all - if (obExprNDesc == null) { - astToExprNDescMap = genAllExprNodeDesc(ref, inputRR); - obExprNDesc = astToExprNDescMap.get(ref); - } - if (obExprNDesc == null) { - throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); - } - // 2.2 Convert ExprNode to RexNode - rnd = converter.convert(obExprNDesc); - - // 2.3 Determine the index of ob expr in child schema - // NOTE: Calcite can not take compound exprs in OB without it being - // present in the child (& hence we add a child Project Rel) - if (rnd instanceof RexInputRef) { - fieldIndex = ((RexInputRef) rnd).getIndex(); - } else { - fieldIndex = srcRelRecordSz + newVCLst.size(); - newVCLst.add(rnd); - vcASTTypePairs.add(new Pair(ref, obExprNDesc.getTypeInfo())); - } - } + // 4. Construct SortRel + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize( + RelCollationImpl.of(obLogicalPlanGenState.getFieldCollation())); + RelNode sortRel = new HiveSortLimit( + cluster, traitSet, obLogicalPlanGenState.getObInputRel(), canonizedCollation, null, null); + + return endGenOBLogicalPlan(obLogicalPlanGenState, sortRel); + } + + private RelNode genSBLogicalPlan(QB qb, Pair selPair, + boolean outermostOB) throws SemanticException { + + QBParseInfo qbp = getQBParseInfo(qb); + String dest = qbp.getClauseNames().iterator().next(); + ASTNode sbAST = qbp.getSortByForClause(dest); - // 2.4 Determine the Direction of order by - RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; - if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { - order = RelFieldCollation.Direction.ASCENDING; + if (sbAST == null) { + return null; + } + + OBLogicalPlanGenState obLogicalPlanGenState = beginGenOBLogicalPlan(sbAST, selPair, outermostOB); + + // 4. Construct SortRel + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = + traitSet.canonize(RelCollationImpl.of(obLogicalPlanGenState.getFieldCollation())); + List joinKeyPositions = new ArrayList<>(canonizedCollation.getFieldCollations().size()); + ImmutableList.Builder builder = ImmutableList.builder(); + for (RelFieldCollation relFieldCollation : canonizedCollation.getFieldCollations()) { + int index = relFieldCollation.getFieldIndex(); + joinKeyPositions.add(index); + builder.add(cluster.getRexBuilder().makeInputRef(obLogicalPlanGenState.getObInputRel(), index)); + } + + RelNode sortRel = HiveSortExchange.create( + obLogicalPlanGenState.getObInputRel(), + // In case of SORT BY we do not need Distribution + // but the instance RelDistributions.ANY can not be used here because + // org.apache.calcite.rel.core.Exchange has + // assert distribution != RelDistributions.ANY; + new HiveRelDistribution(RelDistribution.Type.ANY, RelDistributions.ANY.getKeys()), + canonizedCollation, + builder.build()); + + return endGenOBLogicalPlan(obLogicalPlanGenState, sortRel); + } + + // - Walk through OB exprs and extract field collations and additional virtual columns needed + // - Add Child Project Rel if needed, + // - Generate Output RR, input Sel Rel for top constraining Sel + private OBLogicalPlanGenState beginGenOBLogicalPlan( + ASTNode obAST, Pair selPair, boolean outermostOB) throws SemanticException { + // selPair.getKey() is the operator right before OB + // selPair.getValue() is RR which only contains columns needed in result + // set. Extra columns needed by order by will be absent from it. + RelNode srcRel = selPair.getKey(); + RowResolver selectOutputRR = selPair.getValue(); + + // 2. Walk through OB exprs and extract field collations and additional + // virtual columns needed + final List newVCLst = new ArrayList<>(); + final List fieldCollations = Lists.newArrayList(); + int fieldIndex = 0; + + List obASTExprLst = obAST.getChildren(); + List> vcASTTypePairs = new ArrayList<>(); + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outputRR = new RowResolver(); + + RexNode rnd; + RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), + relToHiveColNameCalcitePosMap.get(srcRel), 0, false); + int srcRelRecordSz = srcRel.getRowType().getFieldCount(); + + for (int i = 0; i < obASTExprLst.size(); i++) { + // 2.1 Convert AST Expr to ExprNode + ASTNode obASTExpr = (ASTNode) obASTExprLst.get(i); + ASTNode nullObASTExpr = (ASTNode) obASTExpr.getChild(0); + ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); + + boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); + boolean isObyByPos = isBothByPos + || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); + // replace each of the position alias in ORDERBY with the actual column + if (ref != null && ref.getToken().getType() == HiveParser.Number) { + if (isObyByPos) { + fieldIndex = getFieldIndexFromColumnNumber(selectOutputRR, ref); + } else { // if not using position alias and it is a number. + LOG.warn("Using constant number " + + ref.getText() + + " in order by. If you try to use position alias when hive.orderby.position.alias is false, " + + "the position alias will be ignored."); } - RelFieldCollation.NullDirection nullOrder; - if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_FIRST) { - nullOrder = RelFieldCollation.NullDirection.FIRST; - } else if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_LAST) { - nullOrder = RelFieldCollation.NullDirection.LAST; + } else { + ExprNodeDesc obExprNDesc = getOrderByExprNodeDesc(selectOutputRR, inputRR, obASTExpr, ref); + // 2.2 Convert ExprNode to RexNode + rnd = converter.convert(obExprNDesc); + + // 2.3 Determine the index of ob expr in child schema + // NOTE: Calcite can not take compound exprs in OB without it being + // present in the child (& hence we add a child Project Rel) + if (rnd instanceof RexInputRef) { + fieldIndex = ((RexInputRef) rnd).getIndex(); } else { - throw new SemanticException("Unexpected null ordering option: " - + nullObASTExpr.getType()); + fieldIndex = srcRelRecordSz + newVCLst.size(); + newVCLst.add(rnd); + vcASTTypePairs.add(new Pair<>(ref, obExprNDesc.getTypeInfo())); } + } - // 2.5 Add to field collations - fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder)); + // 2.4 Determine the Direction of order by + RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; + if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + order = RelFieldCollation.Direction.ASCENDING; + } + RelFieldCollation.NullDirection nullOrder; + if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_FIRST) { + nullOrder = RelFieldCollation.NullDirection.FIRST; + } else if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_LAST) { + nullOrder = RelFieldCollation.NullDirection.LAST; + } else { + throw new SemanticException("Unexpected null ordering option: " + + nullObASTExpr.getType()); } - // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel - // for top constraining Sel - RelNode obInputRel = srcRel; - if (!newVCLst.isEmpty()) { - List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - RowResolver obSyntheticProjectRR = new RowResolver(); - if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); - } - int vcolPos = inputRR.getRowSchema().getSignature().size(); - for (Pair astTypePair : vcASTTypePairs) { - obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( - SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, - false)); - vcolPos++; - } - obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), - obSyntheticProjectRR, srcRel); + // 2.5 Add to field collations + fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder)); + } - if (outermostOB) { - if (!RowResolver.add(outputRR, inputRR)) { - throw new CalciteSemanticException( + // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel + // for top constraining Sel + RelNode obInputRel = srcRel; + if (!newVCLst.isEmpty()) { + List originalInputRefs = toRexNodeList(srcRel); + RowResolver obSyntheticProjectRR = new RowResolver(); + if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { + throw new CalciteSemanticException( "Duplicates detected when adding columns to RR: see previous message", UnsupportedFeature.Duplicates_in_RR); - } + } + int vcolPos = inputRR.getRowSchema().getSignature().size(); + for (Pair astTypePair : vcASTTypePairs) { + obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( + SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, + false)); + vcolPos++; + } + obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), + obSyntheticProjectRR, srcRel); - } else { - if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); - } + if (outermostOB) { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } + } else { - if (!RowResolver.add(outputRR, inputRR)) { + if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } } + } else { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); + } + } + return new OBLogicalPlanGenState(obInputRel, fieldCollations, selectOutputRR, outputRR, srcRel); + } - // 4. Construct SortRel - RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); - RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); - sortRel = new HiveSortLimit(cluster, traitSet, obInputRel, canonizedCollation, null, null); - - // 5. Update the maps - // NOTE: Output RR for SortRel is considered same as its input; we may - // end up not using VC that is present in sort rel. Also note that - // rowtype of sortrel is the type of it child; if child happens to be - // synthetic project that we introduced then that projectrel would - // contain the vc. - ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR); - relToHiveRR.put(sortRel, outputRR); - relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + private ExprNodeDesc getOrderByExprNodeDesc( + RowResolver selectOutputRR, RowResolver inputRR, ASTNode obASTExpr, ASTNode ref) + throws SemanticException { + // first try to get it from select + // in case of udtf, selectOutputRR may be null. + ExprNodeDesc obExprNDesc = null; + if (selectOutputRR != null) { + try { + Map astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR); + obExprNDesc = astToExprNDescMap.get(ref); + } catch (SemanticException ex) { + // we can tolerate this as this is the previous behavior + LOG.debug("Can not find column in " + ref.getText() + ". The error msg is " + + ex.getMessage()); + } + } + // then try to get it from all + if (obExprNDesc == null) { + Map astToExprNDescMap = genAllExprNodeDesc(ref, inputRR); + obExprNDesc = astToExprNDescMap.get(ref); + } + if (obExprNDesc == null) { + throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + } + return obExprNDesc; + } - if (selectOutputRR != null) { - List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - List selectedRefs = Lists.newArrayList(); - for (int index = 0; index < selectOutputRR.getColumnInfos().size(); index++) { - selectedRefs.add(originalInputRefs.get(index)); - } - // We need to add select since order by schema may have more columns than result schema. - returnRel = genSelectRelNode(selectedRefs, selectOutputRR, sortRel); - } else { - returnRel = sortRel; + // SELECT a, b FROM t ORDER BY 1 + private int getFieldIndexFromColumnNumber(RowResolver selectOutputRR, ASTNode ref) throws SemanticException { + int fieldIndex; + int pos = Integer.parseInt(ref.getText()); + if (pos > 0 && pos <= selectOutputRR.getColumnInfos().size()) { + // fieldIndex becomes so simple + // Note that pos starts from 1 while fieldIndex starts from 0; + fieldIndex = pos - 1; + } else { + throw new SemanticException( + ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg("Position alias: " + pos + + " does not exist\n" + "The Select List is indexed from 1 to " + + selectOutputRR.getColumnInfos().size())); + } + return fieldIndex; + } + + private List toRexNodeList(RelNode srcRel) { + return srcRel.getRowType().getFieldList().stream() + .map(input -> new RexInputRef(input.getIndex(), input.getType())) + .collect(Collectors.toList()); + } + + // 5. Update RR maps + // NOTE: Output RR for SortRel is considered same as its input; we may + // end up not using VC that is present in sort rel. Also note that + // rowtype of sortrel is the type of it child; if child happens to be + // synthetic project that we introduced then that projectrel would + // contain the vc. + public RelNode endGenOBLogicalPlan(OBLogicalPlanGenState obLogicalPlanGenState, RelNode sortRel) + throws CalciteSemanticException { + + ImmutableMap hiveColNameCalcitePosMap = + buildHiveToCalciteColumnMap(obLogicalPlanGenState.getOutputRR()); + relToHiveRR.put(sortRel, obLogicalPlanGenState.getOutputRR()); + relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + + if (obLogicalPlanGenState.getSelectOutputRR() != null) { + List originalInputRefs = toRexNodeList(obLogicalPlanGenState.getSrcRel()); + List selectedRefs = Lists.newArrayList(); + for (int index = 0; index < obLogicalPlanGenState.getSelectOutputRR().getColumnInfos().size(); index++) { + selectedRefs.add(originalInputRefs.get(index)); } + // We need to add select since order by schema may have more columns than result schema. + return genSelectRelNode(selectedRefs, obLogicalPlanGenState.getSelectOutputRR(), sortRel); + } else { + return sortRel; } - return returnRel; } private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { @@ -4771,6 +4840,7 @@ private void setQueryHints(QB qb) throws SemanticException { } else { String dest = qbp.getClauseNames().iterator().next(); ASTNode obAST = qbp.getOrderByForClause(dest); + ASTNode sbAST = qbp.getSortByForClause(dest); RowResolver originalRR = null; // We only support limited unselected column following by order by. @@ -4781,7 +4851,7 @@ private void setQueryHints(QB qb) throws SemanticException { // If DISTINCT is present, it is not possible to ORDER BY unselected // columns, and in fact adding all columns would change the behavior of // DISTINCT, so we bypass this logic. - if (obAST != null + if ((obAST != null || sbAST != null) && selExprList.getToken().getType() != HiveParser.TOK_SELECTDI && !isAllColRefRewrite) { // 1. OB Expr sanity test @@ -5006,6 +5076,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, RelNode gbHavingRel = null; RelNode selectRel = null; RelNode obRel = null; + RelNode sbRel = null; RelNode limitRel = null; // First generate all the opInfos for the elements in the from clause @@ -5097,11 +5168,15 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, obRel = genOBLogicalPlan(qb, selPair, outerMostQB); srcRel = (obRel == null) ? srcRel : obRel; - // 7. Build Rel for Limit Clause + // 7. Build Rel for Sort By Clause + sbRel = genSBLogicalPlan(qb, selPair, outerMostQB); + srcRel = (sbRel == null) ? srcRel : sbRel; + + // 8. Build Rel for Limit Clause limitRel = genLimitLogicalPlan(qb, srcRel); srcRel = (limitRel == null) ? srcRel : limitRel; - // 8. Incase this QB corresponds to subquery then modify its RR to point + // 9. Incase this QB corresponds to subquery then modify its RR to point // to subquery alias. if (qb.getParseInfo().getAlias() != null) { RowResolver rr = this.relToHiveRR.get(srcRel); @@ -5244,6 +5319,47 @@ private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { } } + /** + * This class stores the partial results of Order/Sort by clause logical plan generation. + * See {@link CalcitePlannerAction#beginGenOBLogicalPlan}, {@link CalcitePlannerAction#endGenOBLogicalPlan} + */ + private static class OBLogicalPlanGenState { + private final RelNode obInputRel; + private final List canonizedCollation; + private final RowResolver selectOutputRR; + private final RowResolver outputRR; + private final RelNode srcRel; + + OBLogicalPlanGenState(RelNode obInputRel, List canonizedCollation, + RowResolver selectOutputRR, RowResolver outputRR, RelNode srcRel) { + this.obInputRel = obInputRel; + this.canonizedCollation = canonizedCollation; + this.selectOutputRR = selectOutputRR; + this.outputRR = outputRR; + this.srcRel = srcRel; + } + + public RelNode getObInputRel() { + return obInputRel; + } + + public List getFieldCollation() { + return canonizedCollation; + } + + public RowResolver getSelectOutputRR() { + return selectOutputRR; + } + + public RowResolver getOutputRR() { + return outputRR; + } + + public RelNode getSrcRel() { + return srcRel; + } + } + @Override protected Table getTableObjectByName(String tabName, boolean throwException) throws HiveException { String[] names = Utilities.getDbTableName(tabName); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index 31068cb8c3..7b25030442 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -52,7 +52,7 @@ * statements (since they are actually inserts) and then doing some patch up to make them work as * updates and deletes instead. */ -public abstract class RewriteSemanticAnalyzer extends SemanticAnalyzer { +public abstract class RewriteSemanticAnalyzer extends CalcitePlanner { protected static final Logger LOG = LoggerFactory.getLogger(RewriteSemanticAnalyzer.class); protected boolean useSuper = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 679ae2e1e6..500cf95702 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -811,20 +811,18 @@ private void replaceDefaultKeywordForUpdate(ASTNode selectExprs, Table targetTab ASTNode selectExpr = (ASTNode) selectExprs.getChild(i); if (selectExpr.getChildCount() == 1 && selectExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { //first child should be rowid - if (i == 0 && !selectExpr.getChild(0).getChild(0).getText().equals("ROW__ID")) { - throw new SemanticException("Unexpected element when replacing default keyword for UPDATE." - + " Expected ROW_ID, found: " + selectExpr.getChild(0).getChild(0).getText()); - } - else if (selectExpr.getChild(0).getChild(0).getText().toLowerCase().equals("default")) { - if (defaultConstraints == null) { - defaultConstraints = getDefaultConstraints(targetTable, null); - } - ASTNode newNode = getNodeReplacementforDefault(defaultConstraints.get(i - 1)); - // replace the node in place - selectExpr.replaceChildren(0, 0, newNode); - if (LOG.isDebugEnabled()) { - LOG.debug("DEFAULT keyword replacement - Inserted {} for table: {}", newNode.getText(), - targetTable.getTableName()); + if (i != 0 || selectExpr.getChild(0).getChild(0).getText().equals("ROW__ID")) { + if (selectExpr.getChild(0).getChild(0).getText().toLowerCase().equals("default")) { + if (defaultConstraints == null) { + defaultConstraints = getDefaultConstraints(targetTable, null); + } + ASTNode newNode = getNodeReplacementforDefault(defaultConstraints.get(i - 1)); + // replace the node in place + selectExpr.replaceChildren(0, 0, newNode); + if (LOG.isDebugEnabled()) { + LOG.debug("DEFAULT keyword replacement - Inserted {} for table: {}", newNode.getText(), + targetTable.getTableName()); + } } } } @@ -1809,6 +1807,7 @@ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) break; case HiveParser.TOK_LIMIT: + queryProperties.setHasLimit(true); if (ast.getChildCount() == 2) { qbp.setDestLimit(ctx_1.dest, Integer.valueOf(ast.getChild(0).getText()), Integer.valueOf(ast.getChild(1).getText())); diff --git ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q index be50b69830..f228ccd609 100644 --- ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q +++ ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q @@ -64,7 +64,7 @@ set hive.cbo.enable=true; --although cbo is enabled, it will not succeed. -select key from v_n10 sort by key limit 10; +select key from v_n10 cluster by key limit 10; select key from (select key as key from src_autho_test_n9 union all select key from v_n10 cluster by key)subq diff --git ql/src/test/queries/clientpositive/sort.q ql/src/test/queries/clientpositive/sort.q index cab2712810..d8c89694e1 100644 --- ql/src/test/queries/clientpositive/sort.q +++ ql/src/test/queries/clientpositive/sort.q @@ -5,3 +5,8 @@ EXPLAIN SELECT x.* FROM SRC x SORT BY key; SELECT x.* FROM SRC x SORT BY key; + +EXPLAIN +SELECT x.* FROM SRC x SORT BY 1; + +SELECT x.* FROM SRC x SORT BY 1; diff --git ql/src/test/queries/clientpositive/sort_acid.q ql/src/test/queries/clientpositive/sort_acid.q new file mode 100644 index 0000000000..9d0b9d0663 --- /dev/null +++ ql/src/test/queries/clientpositive/sort_acid.q @@ -0,0 +1,35 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table acidtlb(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +create table othertlb(c int, d int) stored as orc TBLPROPERTIES ('transactional'='true'); + +insert into acidtlb values(10,200),(30,500); +insert into othertlb values(10, 21),(30, 22),(60, 23),(70, 24),(80, 25); + + +explain cbo +select a, 6 as c, b from acidtlb sort by a, c, b; +select a, 6 as c, b from acidtlb sort by a, c, b; + +update acidtlb set b=777; +select * from acidtlb; + + +update acidtlb set b=350 +where a in (select a from acidtlb where a = 30); + +select * from acidtlb; + +update acidtlb set b=450 +where a in (select c from othertlb where c < 65); + +select * from acidtlb; + +delete from acidtlb +where a in ( + select a from acidtlb a + join othertlb o on a.a = o.c + where o.d = 21); + +select * from acidtlb; diff --git ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite.q.out ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite.q.out index 2b7ff65c7a..8e55c2c55a 100644 --- ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite.q.out +++ ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite.q.out @@ -19,4 +19,4 @@ POSTHOOK: Output: default@cmv_basetable POSTHOOK: Lineage: cmv_basetable.a SCRIPT [] POSTHOOK: Lineage: cmv_basetable.b SCRIPT [] POSTHOOK: Lineage: cmv_basetable.c SCRIPT [] -FAILED: SemanticException Cannot enable automatic rewriting for materialized view. Plan not optimized by CBO because the statement has sort by +FAILED: SemanticException Cannot enable automatic rewriting for materialized view. Unsupported RelNode type HiveSortExchange encountered in the query plan diff --git ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite_2.q.out ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite_2.q.out index 6850290412..b9de681a9b 100644 --- ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite_2.q.out +++ ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite_2.q.out @@ -33,4 +33,4 @@ PREHOOK: query: alter materialized view cmv_mat_view enable rewrite PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE PREHOOK: Input: default@cmv_mat_view PREHOOK: Output: default@cmv_mat_view -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.ddl.DDLTask. org.apache.hadoop.hive.ql.metadata.HiveException: Cannot enable automatic rewriting for materialized view. Plan not optimized by CBO because the statement has sort by +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.ddl.DDLTask. org.apache.hadoop.hive.ql.metadata.HiveException: Cannot enable rewriting for materialized view. Unsupported RelNode type HiveSortExchange encountered in the query plan diff --git ql/src/test/results/clientnegative/update_notnull_constraint.q.out ql/src/test/results/clientnegative/update_notnull_constraint.q.out index 86bfc67480..32905378e7 100644 --- ql/src/test/results/clientnegative/update_notnull_constraint.q.out +++ ql/src/test/results/clientnegative/update_notnull_constraint.q.out @@ -21,4 +21,9 @@ POSTHOOK: Output: default@acid_uami POSTHOOK: Lineage: acid_uami.de SCRIPT [] POSTHOOK: Lineage: acid_uami.i SCRIPT [] POSTHOOK: Lineage: acid_uami.vc SCRIPT [] -FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated! +PREHOOK: query: UPDATE acid_uami set de=null where i=1 +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_uami +PREHOOK: Output: default@acid_uami +#### A masked pattern was here #### +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask diff --git ql/src/test/results/clientpositive/acid_view_delete.q.out ql/src/test/results/clientpositive/acid_view_delete.q.out index 3771c3ba63..a1de850a17 100644 --- ql/src/test/results/clientpositive/acid_view_delete.q.out +++ ql/src/test/results/clientpositive/acid_view_delete.q.out @@ -94,7 +94,7 @@ POSTHOOK: Input: default@mydim #### A masked pattern was here #### 1 bob 95136 true 2 joe 70068 true -Warning: Shuffle Join JOIN[20][tables = [mydim, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: update mydim set is_current = false where mydim.key not in(select kv from updates_staging_view) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/authorization_view_disable_cbo_1.q.out ql/src/test/results/clientpositive/authorization_view_disable_cbo_1.q.out index b609982bb9..0b50d93acf 100644 --- ql/src/test/results/clientpositive/authorization_view_disable_cbo_1.q.out +++ ql/src/test/results/clientpositive/authorization_view_disable_cbo_1.q.out @@ -259,12 +259,12 @@ val_11 val_111 val_113 val_114 -PREHOOK: query: select key from v_n10 sort by key limit 10 +PREHOOK: query: select key from v_n10 cluster by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src_autho_test_n9 PREHOOK: Input: default@v_n10 #### A masked pattern was here #### -POSTHOOK: query: select key from v_n10 sort by key limit 10 +POSTHOOK: query: select key from v_n10 cluster by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_autho_test_n9 POSTHOOK: Input: default@v_n10 diff --git ql/src/test/results/clientpositive/auto_join0.q.out ql/src/test/results/clientpositive/auto_join0.q.out index 665cf28dea..12fe541ca1 100644 --- ql/src/test/results/clientpositive/auto_join0.q.out +++ ql/src/test/results/clientpositive/auto_join0.q.out @@ -1,6 +1,6 @@ -Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Stage-5:MAPRED' is a cross product -Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Stage-6:MAPRED' is a cross product -Warning: Shuffle Join JOIN[12][tables = [src1, src2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[38][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -45,10 +45,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -78,11 +78,11 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - a:$INTNAME1 + $hdt$_0:$INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a:$INTNAME1 + $hdt$_0:$INTNAME1 TableScan HashTable Sink Operator keys: @@ -101,18 +101,22 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -144,11 +148,11 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - a:$INTNAME + $hdt$_0:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a:$INTNAME + $hdt$_0:$INTNAME TableScan HashTable Sink Operator keys: @@ -167,18 +171,22 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -207,28 +215,32 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -258,9 +270,9 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Stage-5:MAPRED' is a cross product -Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Stage-6:MAPRED' is a cross product -Warning: Shuffle Join JOIN[12][tables = [src1, src2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[38][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( SELECT src1.key as k1, src1.value as v1, diff --git ql/src/test/results/clientpositive/auto_join15.q.out ql/src/test/results/clientpositive/auto_join15.q.out index 6ea9db28f0..15fa1d263d 100644 --- ql/src/test/results/clientpositive/auto_join15.q.out +++ ql/src/test/results/clientpositive/auto_join15.q.out @@ -27,11 +27,11 @@ STAGE PLANS: Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - a:src1 + $hdt$_0:$hdt$_0:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a:src1 + $hdt$_0:$hdt$_0:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -39,10 +39,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-2 Map Reduce @@ -54,29 +58,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/auto_join20.q.out ql/src/test/results/clientpositive/auto_join20.q.out index 6bbcb47084..6475ad2ee0 100644 --- ql/src/test/results/clientpositive/auto_join20.q.out +++ ql/src/test/results/clientpositive/auto_join20.q.out @@ -32,47 +32,55 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - a:src1 + $hdt$_0:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a:src1 + $hdt$_0:$hdt$_1:src1 TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-8 Map Reduce Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -83,19 +91,19 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - a:$INTNAME + $hdt$_0:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a:$INTNAME + $hdt$_0:$INTNAME TableScan HashTable Sink Operator filter predicates: 0 - 1 {(key < 20)} + 1 {_col2} keys: 0 _col0 (type: string) - 1 key (type: string) + 1 _col0 (type: string) Stage: Stage-6 Map Reduce @@ -103,33 +111,37 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 20)} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - minReductionHashAggr: 0.99 - mode: hash + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -167,36 +179,40 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 20)} + 1 {VALUE._col1} keys: 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + aggregations: sum(_col0) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 @@ -267,47 +283,55 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - a:src1 + $hdt$_0:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a:src1 + $hdt$_0:$hdt$_1:src1 TableScan alias: src1 - filterExpr: ((key < 15) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-8 Map Reduce Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key < 15)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key < 15)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -318,19 +342,19 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - a:$INTNAME + $hdt$_0:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a:$INTNAME + $hdt$_0:$INTNAME TableScan HashTable Sink Operator filter predicates: 0 - 1 {(key < 20)} + 1 {_col2} keys: 0 _col0 (type: string) - 1 key (type: string) + 1 _col0 (type: string) Stage: Stage-6 Map Reduce @@ -338,33 +362,37 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 20)} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - minReductionHashAggr: 0.99 - mode: hash + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -402,36 +430,40 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 20)} + 1 {VALUE._col1} keys: 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + aggregations: sum(_col0) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/auto_join21.q.out ql/src/test/results/clientpositive/auto_join21.q.out index b0af07c93e..35a0f32fe2 100644 --- ql/src/test/results/clientpositive/auto_join21.q.out +++ ql/src/test/results/clientpositive/auto_join21.q.out @@ -22,49 +22,53 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - src2 + $hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src2 + $hdt$_1:$hdt$_1:src1 TableScan - alias: src2 - filterExpr: (key > 10) (type: boolean) + alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -84,10 +88,10 @@ STAGE PLANS: HashTable Sink Operator filter predicates: 0 - 1 {(key < 10)} + 1 {_col2} keys: - 0 _col5 (type: string) - 1 key (type: string) + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-6 Map Reduce @@ -95,21 +99,21 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -128,16 +132,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -148,44 +152,44 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col5 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 10)} + 1 {VALUE._col1} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/auto_join23.q.out ql/src/test/results/clientpositive/auto_join23.q.out index 9dcfc1aa6f..e91161ad5c 100644 --- ql/src/test/results/clientpositive/auto_join23.q.out +++ ql/src/test/results/clientpositive/auto_join23.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value PREHOOK: type: QUERY @@ -18,43 +18,47 @@ STAGE PLANS: Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:src1 TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 - 1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -84,7 +88,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/auto_join28.q.out ql/src/test/results/clientpositive/auto_join28.q.out index 6b27398f1e..e64539efd4 100644 --- ql/src/test/results/clientpositive/auto_join28.q.out +++ ql/src/test/results/clientpositive/auto_join28.q.out @@ -22,49 +22,53 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - src2 + $hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src2 + $hdt$_1:$hdt$_1:src1 TableScan - alias: src2 - filterExpr: (key > 10) (type: boolean) + alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -84,10 +88,10 @@ STAGE PLANS: HashTable Sink Operator filter predicates: 0 - 1 {(key < 10)} + 1 {_col2} keys: - 0 _col5 (type: string) - 1 key (type: string) + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-6 Map Reduce @@ -95,21 +99,21 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -128,16 +132,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -148,44 +152,44 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col5 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 10)} + 1 {VALUE._col1} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -212,40 +216,48 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - src2 + $hdt$_1:src2 Fetch Operator limit: -1 - src3 + $hdt$_2:src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src2 + $hdt$_1:src2 TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) + filterExpr: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > 10) (type: boolean) + predicate: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - src3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col5 (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce @@ -253,34 +265,34 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -288,10 +300,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -322,40 +334,48 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:src1 Fetch Operator limit: -1 - src3 + $hdt$_2:src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:src1 TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - src3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col5 (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce @@ -363,34 +383,34 @@ STAGE PLANS: TableScan alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -398,10 +418,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -437,49 +457,65 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:$hdt$_1:src1 TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-8 Map Reduce Map Operator Tree: TableScan alias: src2 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) > 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -499,10 +535,10 @@ STAGE PLANS: HashTable Sink Operator filter predicates: 0 - 1 {(key < 10)} + 1 {_col2} keys: - 0 _col5 (type: string) - 1 key (type: string) + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-6 Map Reduce @@ -510,21 +546,21 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -543,16 +579,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -563,44 +599,44 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col5 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 10)} + 1 {VALUE._col1} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/auto_join29.q.out ql/src/test/results/clientpositive/auto_join29.q.out index ade39bdc1a..335b6dc52b 100644 --- ql/src/test/results/clientpositive/auto_join29.q.out +++ ql/src/test/results/clientpositive/auto_join29.q.out @@ -22,49 +22,53 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - src2 + $hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src2 + $hdt$_1:$hdt$_1:src1 TableScan - alias: src2 - filterExpr: (key > 10) (type: boolean) + alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -84,10 +88,10 @@ STAGE PLANS: HashTable Sink Operator filter predicates: 0 - 1 {(key < 10)} + 1 {_col2} keys: - 0 _col5 (type: string) - 1 key (type: string) + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-6 Map Reduce @@ -95,21 +99,21 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -128,16 +132,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -148,44 +152,44 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col5 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 10)} + 1 {VALUE._col1} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -720,40 +724,48 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - src2 + $hdt$_1:src2 Fetch Operator limit: -1 - src3 + $hdt$_2:src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src2 + $hdt$_1:src2 TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) + filterExpr: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > 10) (type: boolean) + predicate: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - src3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col5 (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce @@ -761,34 +773,34 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -796,10 +808,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1338,40 +1350,48 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:src1 Fetch Operator limit: -1 - src3 + $hdt$_2:src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:src1 TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - src3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col5 (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce @@ -1379,34 +1399,34 @@ STAGE PLANS: TableScan alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1414,10 +1434,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1973,49 +1993,65 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:$hdt$_1:src1 TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-8 Map Reduce Map Operator Tree: TableScan alias: src2 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) > 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2035,10 +2071,10 @@ STAGE PLANS: HashTable Sink Operator filter predicates: 0 - 1 {(key < 10)} + 1 {_col2} keys: - 0 _col5 (type: string) - 1 key (type: string) + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-6 Map Reduce @@ -2046,21 +2082,21 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -2079,16 +2115,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2099,44 +2135,44 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col5 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 10)} + 1 {VALUE._col1} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -2683,73 +2719,79 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:$hdt$_0:src1 Fetch Operator limit: -1 - src3 + $hdt$_2:src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:$hdt$_0:src1 TableScan alias: src1 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - src3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col5 (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2757,10 +2799,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2804,47 +2846,56 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_1:$hdt$_1:src1 TableScan alias: src1 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-8 Map Reduce Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2864,10 +2915,10 @@ STAGE PLANS: HashTable Sink Operator filter predicates: 0 - 1 {(key < 10)} + 1 {_col2} keys: - 0 _col5 (type: string) - 1 key (type: string) + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-6 Map Reduce @@ -2875,21 +2926,21 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -2908,16 +2959,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2928,44 +2979,44 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col5 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 10)} + 1 {VALUE._col1} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -3500,78 +3551,82 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - src2 + $hdt$_0:$hdt$_0:src1 Fetch Operator limit: -1 - src3 + $hdt$_2:src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src2 + $hdt$_0:$hdt$_0:src1 TableScan - alias: src2 - filterExpr: (key > 10) (type: boolean) + alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - src3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col5 (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col5 < 10) (type: boolean) - Statistics: Num rows: 254 Data size: 60876 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(_col0) < 10.0D) (type: boolean) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3579,10 +3634,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3602,6 +3657,7 @@ POSTHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY @@ -3621,79 +3677,86 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:$hdt$_0:src1 Fetch Operator limit: -1 - src3 + $hdt$_2:src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:$hdt$_0:src1 TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - src3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + $hdt$_2:src3 TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col5 (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Inner Join 0 to 1 + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3701,10 +3764,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3716,6 +3779,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -3765,73 +3829,82 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:$hdt$_0:src1 Fetch Operator limit: -1 - src3 + $hdt$_2:src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:$hdt$_0:src1 TableScan alias: src1 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - src3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col5 (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(_col0) < 10.0D) (type: boolean) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3839,10 +3912,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/auto_join31.q.out ql/src/test/results/clientpositive/auto_join31.q.out index 73651d36f9..0c0e474220 100644 --- ql/src/test/results/clientpositive/auto_join31.q.out +++ ql/src/test/results/clientpositive/auto_join31.q.out @@ -33,26 +33,14 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - x:src + $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - z:src + $hdt$_0:$hdt$_2:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - x:src - TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - z:src + $hdt$_0:$hdt$_0:src TableScan alias: src filterExpr: key is not null (type: boolean) @@ -68,28 +56,45 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + $hdt$_0:$hdt$_2:src + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -98,17 +103,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col2, _col3 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out index 72e041e767..0035b52311 100644 --- ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out +++ ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -40,15 +40,15 @@ STAGE PLANS: Alias -> Map Local Operator Tree: a:cbo_t1:cbo_t3 TableScan - alias: cbo_t3 - filterExpr: (key < 10) (type: boolean) + alias: a:cbo_t1:cbo_t3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: key, value Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: @@ -59,15 +59,15 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: cbo_t3 - filterExpr: (key < 10) (type: boolean) + alias: a:cbo_t2:cbo_t3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: key, value Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -75,14 +75,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: key, value, key0, value0 Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE + expressions: hash(key,value,key0,value0) (type: int) + outputColumnNames: $f0 + Statistics: Num rows: 36 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) + aggregations: sum($f0) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 @@ -99,19 +99,15 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -119,7 +115,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -161,15 +157,15 @@ STAGE PLANS: Alias -> Map Local Operator Tree: a:cbo_t1:cbo_t3 TableScan - alias: cbo_t3 - filterExpr: (key < 10) (type: boolean) + alias: a:cbo_t1:cbo_t3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: key, value Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: @@ -180,15 +176,15 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: cbo_t3 - filterExpr: (key < 10) (type: boolean) + alias: a:cbo_t2:cbo_t3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: key, value Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -196,14 +192,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: key, value, key0, value0 Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE + expressions: hash(key,value,key0,value0) (type: int) + outputColumnNames: $f0 + Statistics: Num rows: 36 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) + aggregations: sum($f0) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 @@ -220,19 +216,15 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/correlationoptimizer14.q.out ql/src/test/results/clientpositive/correlationoptimizer14.q.out index b8d764841b..2962ea9a7b 100644 --- ql/src/test/results/clientpositive/correlationoptimizer14.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer14.q.out @@ -169,33 +169,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -238,33 +237,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: y - filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/identity_project_remove_skip.q.out ql/src/test/results/clientpositive/identity_project_remove_skip.q.out index 1176c7c0d4..756309cf12 100644 --- ql/src/test/results/clientpositive/identity_project_remove_skip.q.out +++ ql/src/test/results/clientpositive/identity_project_remove_skip.q.out @@ -1,3 +1,4 @@ +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: explain select t2.* from @@ -29,11 +30,11 @@ STAGE PLANS: Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: - t2:t1:src + $hdt$_0:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - t2:t1:src + $hdt$_0:src TableScan alias: src filterExpr: ((value = 'val_105') and (key = '105')) (type: boolean) @@ -42,11 +43,11 @@ STAGE PLANS: predicate: ((value = 'val_105') and (key = '105')) (type: boolean) Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 '105' (type: string) - 1 '105' (type: string) + 0 + 1 Stage: Stage-3 Map Reduce @@ -64,16 +65,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 '105' (type: string) - 1 '105' (type: string) + 0 + 1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: '105' (type: string), 'val_105' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -88,6 +89,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select t2.* from (select key,value from (select key,value from src) t1 sort by key) t2 diff --git ql/src/test/results/clientpositive/input_part7.q.out ql/src/test/results/clientpositive/input_part7.q.out index 15a388e88a..5ac50a4456 100644 --- ql/src/test/results/clientpositive/input_part7.q.out +++ ql/src/test/results/clientpositive/input_part7.q.out @@ -32,54 +32,54 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: ((ds = '2008-04-08') and (key < 100)) (type: boolean) + filterExpr: ((UDFToDouble(key) < 100.0D) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) Statistics: Num rows: 333 Data size: 120546 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 333 Data size: 120546 Basic stats: COMPLETE Column stats: COMPLETE Union Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) - null sort order: zzzz - sort order: ++++ - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + null sort order: zzz + sort order: +++ + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false TableScan alias: y - filterExpr: ((ds = '2008-04-08') and (key < 100)) (type: boolean) + filterExpr: ((UDFToDouble(key) < 100.0D) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) Statistics: Num rows: 333 Data size: 120546 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 333 Data size: 120546 Basic stats: COMPLETE Column stats: COMPLETE Union Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) - null sort order: zzzz - sort order: ++++ - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + null sort order: zzz + sort order: +++ + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Path -> Alias: @@ -184,12 +184,12 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a:__u1-subquery1:x, a:__u1-subquery2:y] - /srcpart/ds=2008-04-08/hr=12 [a:__u1-subquery1:x, a:__u1-subquery2:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0-subquery1:x, $hdt$_0-subquery2:y] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0-subquery1:x, $hdt$_0-subquery2:y] Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), '2008-04-08' (type: string), KEY.reducesinkkey3 (type: string) + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), '2008-04-08' (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/join0.q.out ql/src/test/results/clientpositive/join0.q.out index cda0f3f0a5..9f2e1814bb 100644 --- ql/src/test/results/clientpositive/join0.q.out +++ ql/src/test/results/clientpositive/join0.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -30,10 +30,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -46,10 +46,10 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN FORMATTED SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -125,8 +125,8 @@ SELECT src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"cboInfo":"Plan not optimized by CBO because the statement has sort by","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","filterExpr:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_13","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","columnExprMap:":{"_col0":"key","_col1":"value"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_2","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0","VALUE._col1":"_col1"},"null sort order:":"","sort order:":"","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_6"}}}}}}}},{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","filterExpr:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_3","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_14","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","columnExprMap:":{"_col0":"key","_col1":"value"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0","VALUE._col1":"_col1"},"null sort order:":"","sort order:":"","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_7"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"columnExprMap:":{"_col0":"0:VALUE._col0","_col1":"0:VALUE._col1","_col2":"1:VALUE._col0","_col3":"1:VALUE._col1"},"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"JOIN_8","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_15"}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"columns:":["_col0","_col1","_col2","_col3"],"OperatorId:":"TS_16","children":{"Reduce Output Operator":{"columnExprMap:":{"KEY.reducesinkkey0":"_col0","KEY.reducesinkkey1":"_col1","KEY.reducesinkkey2":"_col2","KEY.reducesinkkey3":"_col3"},"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","null sort order:":"zzzz","sort order:":"++++","Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"RS_10"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","columnExprMap:":{"_col0":"KEY.reducesinkkey0","_col1":"KEY.reducesinkkey1","_col2":"KEY.reducesinkkey2","_col3":"KEY.reducesinkkey3"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_11","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_12"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}} -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"key\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"value\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"key\",\n \"ndv\": 316\n },\n {\n \"name\": \"value\",\n \"ndv\": 307\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": {\n \"name\": \"<\",\n \"kind\": \"LESS_THAN\",\n \"syntax\": \"BINARY\"\n },\n \"operands\": [\n {\n \"op\": {\n \"name\": \"CAST\",\n \"kind\": \"CAST\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"type\": {\n \"type\": \"DOUBLE\",\n \"nullable\": true\n }\n },\n {\n \"literal\": 10,\n \"type\": {\n \"type\": \"DOUBLE\",\n \"nullable\": false\n }\n }\n ]\n },\n \"rowCount\": 166.66666666666666\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"key\",\n \"value\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ],\n \"rowCount\": 166.66666666666666\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"literal\": true,\n \"type\": {\n \"type\": \"BOOLEAN\",\n \"nullable\": false\n }\n },\n \"joinType\": \"inner\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"2\",\n \"2\"\n ],\n \"rowCount\": 27777.777777777774\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"key\",\n \"value\",\n \"key0\",\n \"value0\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n }\n ],\n \"rowCount\": 27777.777777777774\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange\",\n \"distribution\": {\n \"type\": \"ANY\"\n },\n \"collation\": [\n {\n \"field\": 0,\n \"direction\": \"ASCENDING\",\n \"nulls\": \"LAST\"\n },\n {\n \"field\": 1,\n \"direction\": \"ASCENDING\",\n \"nulls\": \"LAST\"\n },\n {\n \"field\": 2,\n \"direction\": \"ASCENDING\",\n \"nulls\": \"LAST\"\n },\n {\n \"field\": 3,\n \"direction\": \"ASCENDING\",\n \"nulls\": \"LAST\"\n }\n ],\n \"rowCount\": 27777.777777777774\n }\n ]\n}","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","filterExpr:":"(UDFToDouble(key) < 10.0D) (type: boolean)","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Filter Operator":{"predicate:":"(UDFToDouble(key) < 10.0D) (type: boolean)","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_13","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","columnExprMap:":{"_col0":"key","_col1":"value"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_2","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0","VALUE._col1":"_col1"},"null sort order:":"","sort order:":"","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_6"}}}}}}}},{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","filterExpr:":"(UDFToDouble(key) < 10.0D) (type: boolean)","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_3","children":{"Filter Operator":{"predicate:":"(UDFToDouble(key) < 10.0D) (type: boolean)","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_14","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","columnExprMap:":{"_col0":"key","_col1":"value"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0","VALUE._col1":"_col1"},"null sort order:":"","sort order:":"","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_7"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"columnExprMap:":{"_col0":"0:VALUE._col0","_col1":"0:VALUE._col1","_col2":"1:VALUE._col0","_col3":"1:VALUE._col1"},"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"JOIN_8","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_15"}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"columns:":["_col0","_col1","_col2","_col3"],"OperatorId:":"TS_16","children":{"Reduce Output Operator":{"columnExprMap:":{"KEY.reducesinkkey0":"_col0","KEY.reducesinkkey1":"_col1","KEY.reducesinkkey2":"_col2","KEY.reducesinkkey3":"_col3"},"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","null sort order:":"zzzz","sort order:":"++++","Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"RS_10"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","columnExprMap:":{"_col0":"KEY.reducesinkkey0","_col1":"KEY.reducesinkkey1","_col2":"KEY.reducesinkkey2","_col3":"KEY.reducesinkkey3"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_11","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_12"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}} +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM (SELECT * FROM src WHERE src.key < 10) src1 diff --git ql/src/test/results/clientpositive/join15.q.out ql/src/test/results/clientpositive/join15.q.out index 664f2e0614..bbde586a2a 100644 --- ql/src/test/results/clientpositive/join15.q.out +++ ql/src/test/results/clientpositive/join15.q.out @@ -24,13 +24,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src2 filterExpr: key is not null (type: boolean) @@ -38,32 +42,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git ql/src/test/results/clientpositive/join20.q.out ql/src/test/results/clientpositive/join20.q.out index 9327cf453a..02f71ddae0 100644 --- ql/src/test/results/clientpositive/join20.q.out +++ ql/src/test/results/clientpositive/join20.q.out @@ -22,41 +22,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -73,40 +81,40 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 20)} + 1 {VALUE._col1} keys: 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -116,16 +124,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -719,41 +727,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key < 15) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src2 - filterExpr: ((key < 10) and (key < 15)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key < 15)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -770,40 +786,40 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 20)} + 1 {VALUE._col1} keys: 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -813,16 +829,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/join21.q.out ql/src/test/results/clientpositive/join21.q.out index d25eb5cb09..58663573a9 100644 --- ql/src/test/results/clientpositive/join21.q.out +++ ql/src/test/results/clientpositive/join21.q.out @@ -21,39 +21,46 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 + Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -66,44 +73,44 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col5 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 10)} + 1 {VALUE._col1} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -113,16 +120,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/join23.q.out ql/src/test/results/clientpositive/join23.q.out index 4f8f0d8c14..23b4207b4e 100644 --- ql/src/test/results/clientpositive/join23.q.out +++ ql/src/test/results/clientpositive/join23.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value PREHOOK: type: QUERY @@ -20,28 +20,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -49,18 +57,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -91,7 +95,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/join40.q.out ql/src/test/results/clientpositive/join40.q.out index b0942ea3e7..f33ff5a0d6 100644 --- ql/src/test/results/clientpositive/join40.q.out +++ ql/src/test/results/clientpositive/join40.q.out @@ -1789,41 +1789,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1840,40 +1848,40 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 20)} + 1 {VALUE._col1} keys: 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1883,16 +1891,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2486,41 +2494,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key < 15) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src2 - filterExpr: ((key < 10) and (key < 15)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key < 15)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -2537,40 +2553,40 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 - 1 {(KEY.reducesinkkey0 < 20)} + 1 {VALUE._col1} keys: 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2580,16 +2596,16 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 699398bc9d..aeeffa5e20 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -137,21 +137,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_acid - filterExpr: (((UDFToInteger(key) = 413) or (UDFToInteger(key) = 43)) and (hr = '11')) (type: boolean) + filterExpr: ((UDFToInteger(key)) IN (413, 43) and (hr = '11')) (type: boolean) Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: ((UDFToInteger(key) = 413) or (UDFToInteger(key) = 43)) (type: boolean) - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1000 Data size: 617000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1000 Data size: 617000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -161,10 +161,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1000 Data size: 617000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 617000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -812,21 +812,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_acidb - filterExpr: (((UDFToInteger(key) = 413) or (UDFToInteger(key) = 43)) and (hr = '11')) (type: boolean) + filterExpr: ((UDFToInteger(key)) IN (413, 43) and (hr = '11')) (type: boolean) Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: ((UDFToInteger(key) = 413) or (UDFToInteger(key) = 43)) (type: boolean) - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1000 Data size: 617000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1000 Data size: 617000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -836,10 +836,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1000 Data size: 617000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 617000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1371,19 +1371,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 5:int, val 413)(children: CastStringToLong(col 0:string) -> 5:int), FilterLongColEqualLongScalar(col 6:int, val 43)(children: CastStringToLong(col 0:string) -> 6:int)) + predicateExpression: FilterLongColumnInList(col 5:int, values [413, 43])(children: CastStringToLong(col 0:string) -> 5:int) Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4, 0, 7, 2] - selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 7:string + projectedOutputColumnNums: [4, 0, 6, 2] + selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 6:string Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumns: 5:int - valueColumns: 0:string, 7:string, 2:string + valueColumns: 0:string, 6:string, 2:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1402,7 +1402,7 @@ STAGE PLANS: neededVirtualColumns: [ROWID] partitionColumnCount: 2 partitionColumns: ds:string, hr:string - scratchColumnTypeNames: [bigint, bigint, string] + scratchColumnTypeNames: [bigint, string] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1758,45 +1758,43 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-4 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Reducer 3 (SIMPLE_EDGE) - Reducer 7 <- Reducer 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string)) + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 4:struct Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1805,27 +1803,38 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] dataColumns: key:string, value:string + neededVirtualColumns: [ROWID] partitionColumnCount: 2 partitionColumns: ds:string, hr:string scratchColumnTypeNames: [] - Map 8 + Map 7 Map Operator Tree: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 2:string, 3:string, 0:string, 1:string + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 4:struct + projectedOutputColumnNums: [0, 1, 2, 3] + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:string, 3:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1834,78 +1843,21 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] dataColumns: key:string, value:string - neededVirtualColumns: [ROWID] partitionColumnCount: 2 partitionColumns: ds:string, hr:string scratchColumnTypeNames: [] Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: zzzz - reduceColumnSortOrder: ++++ - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:string, KEY._col3:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:string, 3:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - App Master Event Vectorization: - className: VectorAppMasterEventOperator - native: true - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 1:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - App Master Event Vectorization: - className: VectorAppMasterEventOperator - native: true - Reducer 3 MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1928,6 +1880,14 @@ STAGE PLANS: File Sink Vectorization: className: VectorFileSinkOperator native: false + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -1959,14 +1919,66 @@ STAGE PLANS: notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported vectorized: false Reduce Operator Tree: - Reducer 7 - Execution mode: llap + Reducer 8 + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported - vectorized: false + reduceColumnNullOrder: zzzz + reduceColumnSortOrder: ++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:string, KEY._col3:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 0, 1] + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:string, 3:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + App Master Event Vectorization: + className: VectorAppMasterEventOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 3:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + App Master Event Vectorization: + className: VectorAppMasterEventOperator + native: true Stage: Stage-5 @@ -1974,15 +1986,15 @@ STAGE PLANS: Stage: Stage-6 - Stage: Stage-2 + Stage: Stage-1 Stage: Stage-7 - Stage: Stage-3 + Stage: Stage-2 Stage: Stage-8 - Stage: Stage-1 + Stage: Stage-3 Stage: Stage-9 @@ -2230,19 +2242,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 5:int, val 413)(children: CastStringToLong(col 0:string) -> 5:int), FilterLongColEqualLongScalar(col 6:int, val 43)(children: CastStringToLong(col 0:string) -> 6:int)) + predicateExpression: FilterLongColumnInList(col 5:int, values [413, 43])(children: CastStringToLong(col 0:string) -> 5:int) Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4, 0, 7, 2] - selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 7:string + projectedOutputColumnNums: [4, 0, 6, 2] + selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 6:string Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumns: 5:int - valueColumns: 0:string, 7:string, 2:string + valueColumns: 0:string, 6:string, 2:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2261,7 +2273,7 @@ STAGE PLANS: neededVirtualColumns: [ROWID] partitionColumnCount: 2 partitionColumns: ds:string, hr:string - scratchColumnTypeNames: [bigint, bigint, string] + scratchColumnTypeNames: [bigint, string] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -2617,46 +2629,44 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-4 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Reducer 3 (SIMPLE_EDGE) - Reducer 7 <- Reducer 3 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) + Reducer 7 <- Reducer 2 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string)) + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 4:struct Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2665,27 +2675,38 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] dataColumns: key:string, value:string + neededVirtualColumns: [ROWID] partitionColumnCount: 2 partitionColumns: ds:string, hr:string scratchColumnTypeNames: [] - Map 9 + Map 8 Map Operator Tree: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 2:string, 3:string, 0:string, 1:string + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 4:struct + projectedOutputColumnNums: [0, 1, 2, 3] + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:string, 3:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2694,78 +2715,21 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] dataColumns: key:string, value:string - neededVirtualColumns: [ROWID] partitionColumnCount: 2 partitionColumns: ds:string, hr:string scratchColumnTypeNames: [] Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: zzzz - reduceColumnSortOrder: ++++ - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:string, KEY._col3:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:string, 3:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - App Master Event Vectorization: - className: VectorAppMasterEventOperator - native: true - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 1:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - App Master Event Vectorization: - className: VectorAppMasterEventOperator - native: true - Reducer 3 MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2788,7 +2752,23 @@ STAGE PLANS: File Sink Vectorization: className: VectorFileSinkOperator native: false + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2811,30 +2791,74 @@ STAGE PLANS: File Sink Vectorization: className: VectorFileSinkOperator native: false - Reducer 6 - Execution mode: llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported - vectorized: false - Reduce Operator Tree: Reducer 7 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported vectorized: false Reduce Operator Tree: - Reducer 8 - Execution mode: llap + Reducer 9 + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported - vectorized: false + reduceColumnNullOrder: zzzz + reduceColumnSortOrder: ++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:string, KEY._col3:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 0, 1] + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:string, 3:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + App Master Event Vectorization: + className: VectorAppMasterEventOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 3:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + App Master Event Vectorization: + className: VectorAppMasterEventOperator + native: true Stage: Stage-5 @@ -2842,15 +2866,15 @@ STAGE PLANS: Stage: Stage-6 - Stage: Stage-2 + Stage: Stage-1 Stage: Stage-7 - Stage: Stage-3 + Stage: Stage-2 Stage: Stage-8 - Stage: Stage-1 + Stage: Stage-3 Stage: Stage-9 diff --git ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out index 2e43110b35..ef837da4e3 100644 --- ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out +++ ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out @@ -585,21 +585,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_orc_bucketed - filterExpr: ((b = 4294967363L) and (t < 100Y)) (type: boolean) + filterExpr: ((t < 100Y) and (b = 4294967363L)) (type: boolean) Statistics: Num rows: 2098 Data size: 706986 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((b = 4294967363L) and (t < 100Y)) (type: boolean) - Statistics: Num rows: 6 Data size: 2022 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((t < 100Y) and (b = 4294967363L)) (type: boolean) + Statistics: Num rows: 7 Data size: 2359 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6 Data size: 2502 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2919 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 6 Data size: 2502 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2919 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col5 (type: float), _col6 (type: double), _col7 (type: boolean), _col8 (type: string), _col9 (type: timestamp), _col10 (type: decimal(4,2)), _col11 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -607,12 +607,12 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), 0 (type: int), 4294967363L (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary) + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), 0 (type: int), 4294967363L (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: double), VALUE._col4 (type: boolean), VALUE._col5 (type: string), VALUE._col6 (type: timestamp), VALUE._col7 (type: decimal(4,2)), VALUE._col8 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6 Data size: 2502 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2919 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 2502 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2919 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_join0.q.out ql/src/test/results/clientpositive/llap/auto_join0.q.out index 31b776fd5d..4fbd217a62 100644 --- ql/src/test/results/clientpositive/llap/auto_join0.q.out +++ ql/src/test/results/clientpositive/llap/auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[22][tables = [src1, src2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -44,10 +44,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -88,17 +88,21 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -133,7 +137,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[22][tables = [src1, src2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( SELECT src1.key as k1, src1.value as v1, diff --git ql/src/test/results/clientpositive/llap/auto_join21.q.out ql/src/test/results/clientpositive/llap/auto_join21.q.out index df866d8ce6..21e5e5e09b 100644 --- ql/src/test/results/clientpositive/llap/auto_join21.q.out +++ ql/src/test/results/clientpositive/llap/auto_join21.q.out @@ -27,44 +27,51 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col5 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -72,28 +79,28 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + input vertices: + 0 Map 1 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 4 @@ -102,10 +109,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_join29.q.out ql/src/test/results/clientpositive/llap/auto_join29.q.out index 9c3a0b1388..dc560dcd49 100644 --- ql/src/test/results/clientpositive/llap/auto_join29.q.out +++ ql/src/test/results/clientpositive/llap/auto_join29.q.out @@ -27,44 +27,51 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col5 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -72,28 +79,28 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + input vertices: + 0 Map 1 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 4 @@ -102,10 +109,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -653,74 +660,82 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 4 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 1 Map 3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + input vertices: + 1 Map 4 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) + filterExpr: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > 10) (type: boolean) + predicate: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -729,10 +744,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1279,18 +1294,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 2 @@ -1298,56 +1317,60 @@ STAGE PLANS: TableScan alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 4 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 0 Map 1 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + input vertices: + 1 Map 4 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 3 @@ -1356,10 +1379,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1910,54 +1933,70 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Map 3 <- Map 2 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src1 - filterExpr: (key < 10) (type: boolean) + alias: src2 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) > 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan - alias: src2 + alias: src1 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col5 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -1965,28 +2004,28 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 2 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + input vertices: + 0 Map 1 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 4 @@ -1995,10 +2034,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2557,76 +2596,82 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 4 - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 1 Map 3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + input vertices: + 1 Map 4 + Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -2635,10 +2680,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2677,8 +2722,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2686,46 +2731,55 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col5 (type: string) + key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -2733,28 +2787,28 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key < 10)} - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + input vertices: + 0 Map 2 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 4 @@ -2763,10 +2817,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3305,8 +3359,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3314,89 +3368,96 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(key < 10)} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col5 < 10) (type: boolean) - Statistics: Num rows: 254 Data size: 60876 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 4 - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(_col0) < 10.0D) (type: boolean) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 4 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3416,6 +3477,7 @@ POSTHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY @@ -3443,79 +3505,84 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(key > 10)} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Inner Join 0 to 1 + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 4 - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 0 Map 1 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE + input vertices: + 1 Map 4 + Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 3 @@ -3524,10 +3591,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3539,6 +3606,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -3588,97 +3656,105 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src1 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col5 (type: string) + key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(_col0) < 10.0D) (type: boolean) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 4 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col5 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_join30.q.out ql/src/test/results/clientpositive/llap/auto_join30.q.out index d81dadb799..0a74992e85 100644 --- ql/src/test/results/clientpositive/llap/auto_join30.q.out +++ ql/src/test/results/clientpositive/llap/auto_join30.q.out @@ -71,21 +71,25 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 3 @@ -177,39 +181,47 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -290,17 +302,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 2 @@ -318,21 +334,25 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 3 @@ -411,8 +431,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -428,37 +448,16 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0 input vertices: - 0 Map 1 - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -469,20 +468,24 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -503,7 +506,29 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 3 + Map 4 + Map Operator Tree: + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -585,8 +610,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -602,40 +627,19 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0 input vertices: - 0 Map 1 - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -643,37 +647,67 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 3 + Map 4 + Map Operator Tree: + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -774,7 +808,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE @@ -784,56 +818,68 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 1 Map 4 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -927,46 +973,54 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 2 Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -984,21 +1038,25 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 4 @@ -1092,45 +1150,53 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -1148,21 +1214,25 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 2 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 4 diff --git ql/src/test/results/clientpositive/llap/check_constraint.q.out ql/src/test/results/clientpositive/llap/check_constraint.q.out index b4acc55f15..3ef0744c7b 100644 --- ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -2070,10 +2070,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n0 - filterExpr: ((de) IN (103, 119) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean) + filterExpr: (de) IN (103, 119) (type: boolean) Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((de) IN (103, 119) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean) + predicate: (de) IN (103, 119) (type: boolean) Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) @@ -2095,15 +2095,18 @@ STAGE PLANS: expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 893.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Filter Operator + predicate: enforce_constraint((_col2 is not null and (_col2 >= CAST( _col1 AS decimal(5,2))) is not false)) (type: boolean) Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_uami_n0 - Write Type: UPDATE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_uami_n0 + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -2199,18 +2202,25 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 893.14 (type: decimal(5,2)), 'apache_hive' (type: varchar(128)) + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 893.14 (type: decimal(5,2)), 'apache_hive' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Filter Operator + predicate: enforce_constraint(_col2 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_uami_n0 - Write Type: UPDATE + Select Operator + expressions: _col0 (type: struct), _col1 (type: int), _col2 (type: decimal(5,2)), CAST( _col3 AS varchar(128)) (type: varchar(128)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_uami_n0 + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -2341,8 +2351,8 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-3 @@ -2356,95 +2366,114 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string), ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 6 Map Operator Tree: TableScan alias: s Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), a1 (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: a1 (type: string), value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: t + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 5) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 3) and (_col6 >= 5) and enforce_constraint(((_col0 > 0) and ((_col0 < 100) or (_col0 = 5))) is not false) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: string) - Filter Operator - predicate: (enforce_constraint((_col7 is not null and ((_col6 > 0) and ((_col6 < 100) or (_col6 = 5))) is not false)) and _col0 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col4 (type: string), _col5 (type: struct), _col2 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = _col1) and (_col1 < 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint((_col1 is not null and ((_col0 > 0) and ((_col0 < 100) or (_col0 = 5))) is not false)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Filter Operator + predicate: ((_col5 = _col1) and (_col1 < 3) and (_col1 >= 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col5 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: string) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -2455,34 +2484,37 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.tmerge - Write Type: UPDATE + Write Type: INSERT Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.tmerge - Write Type: INSERT + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint((_col2 is not null and ((_col1 > 0) and ((_col1 < 100) or (_col1 = 5))) is not false)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tmerge + Write Type: UPDATE Stage: Stage-4 Dependency Collection @@ -2498,7 +2530,7 @@ STAGE PLANS: name: default.tmerge Write Type: DELETE - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false @@ -2507,9 +2539,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.tmerge - Write Type: UPDATE + Write Type: INSERT - Stage: Stage-1 + Stage: Stage-2 Move Operator tables: replace: false @@ -2518,7 +2550,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.tmerge - Write Type: INSERT + Write Type: UPDATE PREHOOK: query: explain MERGE INTO tmerge as t using nonacid as s ON t.key = s.key WHEN MATCHED AND s.key < 5 THEN DELETE @@ -2546,9 +2578,9 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-5 Stage-2 depends on stages: Stage-5 Stage-3 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-4 @@ -2563,116 +2595,135 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string), ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 7 Map Operator Tree: TableScan alias: s Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), a1 (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: a1 (type: string), value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: t + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 5) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 3) and (_col6 >= 5) and enforce_constraint(((_col0 > 0) and ((_col0 < 100) or (_col0 = 5))) is not false) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: string) - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col4 (type: string), _col5 (type: struct), _col2 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = _col1) and (_col1 < 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: (enforce_constraint((_col7 is not null and ((_col6 > 0) and ((_col6 < 100) or (_col6 = 5))) is not false)) and _col0 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint((_col1 is not null and ((_col0 > 0) and ((_col0 < 100) or (_col0 = 5))) is not false)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Filter Operator + predicate: ((_col5 = _col1) and (_col1 < 3) and (_col1 >= 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col5 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: string) + Filter Operator + predicate: (_col5 = _col1) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col3 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -2683,19 +2734,38 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.tmerge - Write Type: UPDATE + Write Type: INSERT Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint((_col2 is not null and ((_col1 > 0) and ((_col1 < 100) or (_col1 = 5))) is not false)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tmerge + Write Type: UPDATE + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2703,38 +2773,22 @@ STAGE PLANS: keys: KEY._col0 (type: struct) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cardinality_violation(_col0) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.tmerge - Write Type: INSERT Stage: Stage-5 Dependency Collection @@ -2750,6 +2804,17 @@ STAGE PLANS: name: default.tmerge Write Type: DELETE + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tmerge + Write Type: INSERT + Stage: Stage-2 Move Operator tables: @@ -2771,17 +2836,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table - Stage: Stage-1 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.tmerge - Write Type: INSERT - PREHOOK: query: explain MERGE INTO tmerge as t using nonacid as s ON t.key = s.key WHEN MATCHED AND s.key < 5 THEN DELETE WHEN NOT MATCHED THEN INSERT VALUES (s.key, s.a1, s.value) @@ -2804,8 +2858,8 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-3 @@ -2819,102 +2873,121 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 6 Map Operator Tree: TableScan alias: s Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), a1 (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: a1 (type: string), value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: t + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 5) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col4 (type: struct), _col2 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col4 = _col1) and (_col1 < 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: (enforce_constraint((_col7 is not null and ((_col6 > 0) and ((_col6 < 100) or (_col6 = 5))) is not false)) and _col0 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col4 is null (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint((_col1 is not null and ((_col0 > 0) and ((_col0 < 100) or (_col0 = 5))) is not false)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Filter Operator + predicate: (_col4 = _col1) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -2922,6 +2995,22 @@ STAGE PLANS: name: default.tmerge Write Type: DELETE Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tmerge + Write Type: INSERT + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2929,38 +3018,22 @@ STAGE PLANS: keys: KEY._col0 (type: struct) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cardinality_violation(_col0) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.tmerge - Write Type: INSERT Stage: Stage-4 Dependency Collection @@ -2976,16 +3049,6 @@ STAGE PLANS: name: default.tmerge Write Type: DELETE - Stage: Stage-2 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Stage: Stage-1 Move Operator tables: @@ -2997,6 +3060,16 @@ STAGE PLANS: name: default.tmerge Write Type: INSERT + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + PREHOOK: query: DROP TABLE tmerge PREHOOK: type: DROPTABLE PREHOOK: Input: default@tmerge diff --git ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index 2522677d95..0b5808269f 100644 --- ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -41,158 +41,170 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-4 Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 9 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Map 9 <- Reducer 8 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Reducer 2 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 2 (SIMPLE_EDGE) + Reducer 8 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t - filterExpr: (a BETWEEN DynamicValue(RS_3_s_a_min) AND DynamicValue(RS_3_s_a_max) and in_bloom_filter(a, DynamicValue(RS_3_s_a_bloom_filter))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a BETWEEN DynamicValue(RS_3_s_a_min) AND DynamicValue(RS_3_s_a_max) and in_bloom_filter(a, DynamicValue(RS_3_s_a_bloom_filter))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + alias: s + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: a (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 8 + LLAP IO: all inputs + Map 9 Map Operator Tree: TableScan - alias: s - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: b (type: int) - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + alias: t + filterExpr: (a is not null and a BETWEEN DynamicValue(RS_5_s_a_min) AND DynamicValue(RS_5_s_a_max) and in_bloom_filter(a, DynamicValue(RS_5_s_a_bloom_filter))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (a is not null and a BETWEEN DynamicValue(RS_5_s_a_min) AND DynamicValue(RS_5_s_a_max) and in_bloom_filter(a, DynamicValue(RS_5_s_a_bloom_filter))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) Execution mode: vectorized, llap - LLAP IO: all inputs + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 > 8) and (_col0 = _col5)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 <= 8) and (_col0 = _col5)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col4 (type: struct) - minReductionHashAggr: 0.99 - mode: hash + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col1 (type: int), _col2 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 = _col3) and (_col3 > 8)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Filter Operator + predicate: ((_col2 = _col3) and (_col3 <= 8)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col2 = _col3) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -200,55 +212,15 @@ STAGE PLANS: name: default.acidtbl Write Type: DELETE Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acidtbl - Write Type: UPDATE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -258,7 +230,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: a, b - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') minReductionHashAggr: 0.99 @@ -270,7 +242,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 7 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -285,7 +257,47 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + Write Type: UPDATE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -317,7 +329,7 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false @@ -326,44 +338,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl - Write Type: UPDATE + Write Type: INSERT Stage: Stage-7 Stats Work Basic Stats Work: - Stage: Stage-3 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + Write Type: UPDATE Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl - Stage: Stage-1 + Stage: Stage-3 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acidtbl - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-9 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: a, b - Column Types: int, int - Table: default.acidtbl PREHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) @@ -399,17 +411,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - filterExpr: (a BETWEEN DynamicValue(RS_3_s_a_min) AND DynamicValue(RS_3_s_a_max) and in_bloom_filter(a, DynamicValue(RS_3_s_a_bloom_filter))) (type: boolean) + filterExpr: (a is not null and a BETWEEN DynamicValue(RS_6_s_a_min) AND DynamicValue(RS_6_s_a_max) and in_bloom_filter(a, DynamicValue(RS_6_s_a_bloom_filter))) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (a BETWEEN DynamicValue(RS_3_s_a_min) AND DynamicValue(RS_3_s_a_max) and in_bloom_filter(a, DynamicValue(RS_3_s_a_bloom_filter))) (type: boolean) + predicate: (a is not null and a BETWEEN DynamicValue(RS_6_s_a_min) AND DynamicValue(RS_6_s_a_max) and in_bloom_filter(a, DynamicValue(RS_6_s_a_bloom_filter))) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 @@ -417,28 +433,32 @@ STAGE PLANS: TableScan alias: s Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: b (type: int) Select Operator - expressions: a (type: int) - outputColumnNames: _col0 + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -448,15 +468,15 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: int), _col6 (type: int) + expressions: _col1 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -583,12 +603,12 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-4 @@ -600,9 +620,9 @@ STAGE PLANS: Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 3 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -631,14 +651,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t + filterExpr: a is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct) + Select Operator + expressions: a (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 9 @@ -668,74 +696,78 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 a (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 > 8) and (_col0 = _col5)) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col0 + Select Operator + expressions: _col3 (type: struct), _col1 (type: int), _col2 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 = _col3) and (_col3 > 8)) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 <= 8) and (_col0 = _col5)) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Select Operator + expressions: _col3 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col4 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Filter Operator + predicate: ((_col2 = _col3) and (_col3 <= 8)) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col4 (type: struct) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: _col0 (type: struct), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1 + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col2 = _col3) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -753,46 +785,6 @@ STAGE PLANS: name: default.acidtbl Write Type: DELETE Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acidtbl - Write Type: UPDATE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 7 Execution mode: llap Reduce Operator Tree: Select Operator @@ -823,7 +815,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 8 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -838,6 +830,46 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + Write Type: UPDATE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Union 2 Vertex: Union 2 @@ -859,7 +891,7 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false @@ -868,44 +900,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl - Write Type: UPDATE + Write Type: INSERT Stage: Stage-7 Stats Work Basic Stats Work: - Stage: Stage-3 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + Write Type: UPDATE Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl - Stage: Stage-1 + Stage: Stage-3 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acidtbl - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-9 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: a, b - Column Types: int, int - Table: default.acidtbl PREHOOK: query: drop database if exists type2_scd_helper cascade PREHOOK: type: DROPDATABASE diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index 57dbbe34c3..ba0b22232a 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -402,20 +402,20 @@ STAGE PLANS: TableScan alias: acid_part_sdpo filterExpr: ((key = 'foo') and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1601 Data size: 280048 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -424,10 +424,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -505,20 +505,20 @@ STAGE PLANS: TableScan alias: acid_part_sdpo filterExpr: ((key = 'foo') and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1601 Data size: 280048 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -527,10 +527,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -841,7 +841,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int) + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10 Data size: 3480 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator @@ -1172,20 +1172,20 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr = 11)) (type: boolean) - Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1601 Data size: 280048 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1194,10 +1194,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1280,10 +1280,10 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr >= 11)) (type: boolean) - Statistics: Num rows: 3201 Data size: 313458 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3201 Data size: 572532 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 455 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), hr (type: int) outputColumnNames: _col0, _col4 @@ -1426,10 +1426,10 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo filterExpr: (value = 'bar') (type: boolean) - Statistics: Num rows: 4200 Data size: 1247277 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4200 Data size: 1523944 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) - Statistics: Num rows: 5 Data size: 1375 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 @@ -1631,33 +1631,32 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo_no_cp filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr = 11)) (type: boolean) - Statistics: Num rows: 1601 Data size: 599036 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1601 Data size: 280048 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ROW__ID (type: struct), key (type: string), ds (type: string), hr (type: int) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL + expressions: ROW__ID (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct) - null sort order: aaaa - sort order: ++++ - Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string), 'bar' (type: string) + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY._col0 (type: struct), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 874 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1740,21 +1739,21 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo_no_cp filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr >= 11)) (type: boolean) - Statistics: Num rows: 3201 Data size: 1197516 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3201 Data size: 572532 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: ROW__ID (type: struct), key (type: string), ds (type: string), hr (type: int) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL + expressions: ROW__ID (type: struct), hr (type: int) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct) + key expressions: '2008-04-08' (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct) null sort order: aaaa sort order: ++++ - Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string), 'bar' (type: string) + Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: 'foo' (type: string), 'bar' (type: string) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1766,7 +1765,7 @@ STAGE PLANS: File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index 74d0190dfe..e96363fe29 100644 --- ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3393,21 +3393,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n1 - filterExpr: ((de) IN (109.23, 119.23) and enforce_constraint(vc is not null)) (type: boolean) + filterExpr: (de) IN (109.23, 119.23) (type: boolean) Statistics: Num rows: 1002 Data size: 225450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((de) IN (109.23, 119.23) and enforce_constraint(vc is not null)) (type: boolean) - Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (de) IN (109.23, 119.23) (type: boolean) + Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1806 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1806 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col3 (type: varchar(128)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -3415,18 +3415,25 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14 (type: decimal(3,2)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Statistics: Num rows: 6 Data size: 1806 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint((_col2 is not null and _col3 is not null)) (type: boolean) Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_uami_n1 - Write Type: UPDATE + Select Operator + expressions: _col0 (type: struct), _col1 (type: int), CAST( _col2 AS decimal(5,2)) (type: decimal(5,2)), _col3 (type: varchar(128)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_uami_n1 + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -3492,21 +3499,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n1 - filterExpr: ((de = 3.14) and enforce_constraint((i is not null and vc is not null))) (type: boolean) + filterExpr: (de = 3.14) (type: boolean) Statistics: Num rows: 1002 Data size: 225450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((de = 3.14) and enforce_constraint((i is not null and vc is not null))) (type: boolean) - Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (de = 3.14) (type: boolean) + Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col3 (type: varchar(128)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -3514,18 +3521,25 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14159 (type: decimal(6,5)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint((_col1 is not null and _col3 is not null)) (type: boolean) Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_uami_n1 - Write Type: UPDATE + Select Operator + expressions: _col0 (type: struct), _col1 (type: int), CAST( _col2 AS decimal(5,2)) (type: decimal(5,2)), _col3 (type: varchar(128)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_uami_n1 + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -4339,10 +4353,10 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 - Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 Stage-1 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -4352,100 +4366,115 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string), ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 7 Map Operator Tree: TableScan alias: s Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), a1 (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: a1 (type: string), value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 5) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 3) and (_col6 >= 5) and enforce_constraint(_col0 is not null) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: string) - Filter Operator - predicate: (enforce_constraint(_col6 is not null) and _col0 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col4 (type: string), _col5 (type: struct), _col2 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = _col1) and (_col1 < 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint(_col0 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Filter Operator + predicate: ((_col5 = _col1) and (_col1 < 3) and (_col1 >= 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col5 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: string) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -4453,31 +4482,15 @@ STAGE PLANS: name: default.masking_test_n4 Write Type: DELETE Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.masking_test_n4 - Write Type: UPDATE - Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -4487,7 +4500,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, a1, value - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') minReductionHashAggr: 0.99 @@ -4499,7 +4512,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4514,6 +4527,25 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint(_col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.masking_test_n4 + Write Type: UPDATE Stage: Stage-4 Dependency Collection @@ -4533,7 +4565,7 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false @@ -4542,13 +4574,13 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.masking_test_n4 - Write Type: UPDATE + Write Type: INSERT Stage: Stage-6 Stats Work Basic Stats Work: - Stage: Stage-1 + Stage: Stage-2 Move Operator tables: replace: false @@ -4557,7 +4589,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.masking_test_n4 - Write Type: INSERT + Write Type: UPDATE Stage: Stage-7 Stats Work @@ -4594,12 +4626,12 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-4 @@ -4609,122 +4641,137 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Reducer 2 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string), ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 8 Map Operator Tree: TableScan alias: s Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), a1 (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: a1 (type: string), value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map 8 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 5) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 3) and (_col6 >= 5) and enforce_constraint(_col0 is not null) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: string) - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col4 (type: string), _col5 (type: struct), _col2 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = _col1) and (_col1 < 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: (enforce_constraint(_col6 is not null) and _col0 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint(_col0 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Filter Operator + predicate: ((_col5 = _col1) and (_col1 < 3) and (_col1 >= 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col5 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: string) + Filter Operator + predicate: (_col5 = _col1) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col3 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -4732,91 +4779,94 @@ STAGE PLANS: name: default.masking_test_n4 Write Type: DELETE Reducer 4 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.masking_test_n4 - Write Type: UPDATE + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, a1, value + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint(_col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.masking_test_n4 + Write Type: UPDATE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cardinality_violation(_col0) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.masking_test_n4 - Write Type: INSERT - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, a1, value - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -4836,7 +4886,7 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false @@ -4845,44 +4895,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.masking_test_n4 - Write Type: UPDATE + Write Type: INSERT Stage: Stage-7 Stats Work Basic Stats Work: - Stage: Stage-3 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.masking_test_n4 + Write Type: UPDATE Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, a1, value + Column Types: int, string, string + Table: default.masking_test_n4 - Stage: Stage-1 + Stage: Stage-3 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.masking_test_n4 - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-9 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: key, a1, value - Column Types: int, string, string - Table: default.masking_test_n4 PREHOOK: query: explain MERGE INTO masking_test_n4 as t using nonacid_n2 as s ON t.key = s.key WHEN MATCHED AND s.key < 5 THEN DELETE @@ -4907,10 +4957,10 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 - Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 Stage-1 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -4920,107 +4970,122 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 7 Map Operator Tree: TableScan alias: s Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), a1 (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: a1 (type: string), value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 5) and (_col0 = _col6)) (type: boolean) + Map 7 + Map Operator Tree: + TableScan + alias: t Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col0 + expressions: key (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: struct) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col4 (type: struct), _col2 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col4 = _col1) and (_col1 < 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: (enforce_constraint(_col6 is not null) and _col0 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col4 is null (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint(_col0 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Filter Operator + predicate: (_col4 = _col1) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -5028,39 +5093,15 @@ STAGE PLANS: name: default.masking_test_n4 Write Type: DELETE Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -5070,7 +5111,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, a1, value - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') minReductionHashAggr: 0.99 @@ -5082,7 +5123,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5097,6 +5138,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-4 Dependency Collection @@ -5116,20 +5181,6 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-2 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - - Stage: Stage-6 - Stats Work - Basic Stats Work: - Stage: Stage-1 Move Operator tables: @@ -5141,7 +5192,7 @@ STAGE PLANS: name: default.masking_test_n4 Write Type: INSERT - Stage: Stage-7 + Stage: Stage-6 Stats Work Basic Stats Work: Column Stats Desc: @@ -5149,6 +5200,20 @@ STAGE PLANS: Column Types: int, string, string Table: default.masking_test_n4 + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-7 + Stats Work + Basic Stats Work: + PREHOOK: query: explain MERGE INTO masking_test_n4 as t using nonacid_n2 as s ON t.key = s.key WHEN MATCHED AND s.key < 3 THEN UPDATE set a1 = '1' WHEN NOT MATCHED THEN INSERT VALUES (s.key, s.a1, s.value) @@ -5170,12 +5235,12 @@ POSTHOOK: Output: default@merge_tmp_table STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -5184,149 +5249,124 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string), ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 7 Map Operator Tree: TableScan alias: s Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), a1 (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: a1 (type: string), value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 3) and enforce_constraint(_col0 is not null) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: string) - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col4 (type: string), _col5 (type: struct), _col2 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint(_col0 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Filter Operator + predicate: ((_col5 = _col1) and (_col1 < 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col5 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: (enforce_constraint(_col6 is not null) and _col0 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: string) + Filter Operator + predicate: (_col5 = _col1) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col3 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.masking_test_n4 - Write Type: UPDATE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -5336,7 +5376,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, a1, value - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') minReductionHashAggr: 0.99 @@ -5348,7 +5388,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 6 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5363,11 +5403,54 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), '1' (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint(_col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.masking_test_n4 + Write Type: UPDATE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-4 Dependency Collection - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: false @@ -5376,44 +5459,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.masking_test_n4 - Write Type: UPDATE + Write Type: INSERT Stage: Stage-5 Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.masking_test_n4 + Write Type: UPDATE Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, a1, value + Column Types: int, string, string + Table: default.masking_test_n4 - Stage: Stage-0 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.masking_test_n4 - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-7 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: key, a1, value - Column Types: int, string, string - Table: default.masking_test_n4 PREHOOK: query: explain MERGE INTO masking_test_n4 as t using nonacid_n2 as s ON t.key = s.key WHEN MATCHED AND s.key < 5 THEN DELETE @@ -5451,18 +5534,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct) + value expressions: _col1 (type: struct) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 @@ -5474,12 +5557,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -5489,44 +5576,48 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col6 < 5) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = _col6) (type: boolean) + Select Operator + expressions: _col2 (type: int), _col1 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 + Filter Operator + predicate: ((_col2 = _col0) and (_col0 < 5)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: _col1 (type: struct) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Filter Operator + predicate: (_col2 = _col0) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index b82a055079..49d77eccfd 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -271,7 +271,7 @@ POSTHOOK: query: drop table src_orc_merge_test_part_n1 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@src_orc_merge_test_part_n1 POSTHOOK: Output: default@src_orc_merge_test_part_n1 -Warning: Shuffle Join MERGEJOIN[18][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( select src1.key as k1, src1.value as v1, @@ -296,7 +296,7 @@ select src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) @@ -313,25 +313,27 @@ Stage-0 <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_12] Group By Operator [GBY_11] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(hash(_col0,_col1,_col2,_col3))"] - Merge Join Operator [MERGEJOIN_18] (rows=27556 width=356) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [XPROD_EDGE] llap - XPROD_EDGE [RS_6] - Select Operator [SEL_2] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [XPROD_EDGE] llap - XPROD_EDGE [RS_7] - Select Operator [SEL_5] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_3] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Output:["_col0"],aggregations:["sum(_col0)"] + Select Operator [SEL_9] (rows=27556 width=356) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_18] (rows=27556 width=356) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 1 [XPROD_EDGE] llap + XPROD_EDGE [RS_6] + Select Operator [SEL_2] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_16] (rows=166 width=178) + predicate:(UDFToDouble(key) < 10.0D) + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [XPROD_EDGE] llap + XPROD_EDGE [RS_7] + Select Operator [SEL_5] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_17] (rows=166 width=178) + predicate:(UDFToDouble(key) < 10.0D) + TableScan [TS_3] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key PREHOOK: type: QUERY @@ -3716,7 +3718,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_CTAS1_n1 -Plan not optimized by CBO because the statement has sort by +Plan not optimized by CBO because the statement has sort by with limit Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) @@ -3787,7 +3789,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_ctas3_n1 -Plan not optimized by CBO because the statement has sort by +Plan not optimized by CBO because the statement has sort by with limit Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) @@ -3896,7 +3898,7 @@ select src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) @@ -3919,7 +3921,7 @@ Stage-0 Select Operator [SEL_2] (rows=166 width=178) Output:["_col0","_col1"] Filter Operator [FIL_13] (rows=166 width=178) - predicate:(key < 10) + predicate:(UDFToDouble(key) < 10.0D) TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 4 [CUSTOM_SIMPLE_EDGE] llap @@ -3927,7 +3929,7 @@ Stage-0 Select Operator [SEL_5] (rows=166 width=178) Output:["_col0","_col1"] Filter Operator [FIL_14] (rows=166 width=178) - predicate:(key < 10) + predicate:(UDFToDouble(key) < 10.0D) TableScan [TS_3] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out index a8a5e38602..082b244a49 100644 --- ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out +++ ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out @@ -1,3 +1,4 @@ +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: explain select t2.* from @@ -42,13 +43,11 @@ STAGE PLANS: predicate: ((value = 'val_105') and (key = '105')) (type: boolean) Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: '105' (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: '105' (type: string) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 2 @@ -66,18 +65,18 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 '105' (type: string) - 1 '105' (type: string) + 0 + 1 input vertices: 0 Map 1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: '105' (type: string), 'val_105' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -91,6 +90,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: select t2.* from (select key,value from (select key,value from src) t1 sort by key) t2 diff --git ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out index bdf1a65cb1..b7355fb2d2 100644 --- ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out +++ ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out @@ -1957,10 +1957,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: insert_into1_n0 - filterExpr: (value = 1) (type: boolean) + filterExpr: (UDFToDouble(value) = 1.0D) (type: boolean) Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (value = 1) (type: boolean) + predicate: (UDFToDouble(value) = 1.0D) (type: boolean) Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), value (type: string), i (type: int) @@ -2070,10 +2070,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: insert_into1_n0 - filterExpr: (value = 1) (type: boolean) + filterExpr: (UDFToDouble(value) = 1.0D) (type: boolean) Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (value = 1) (type: boolean) + predicate: (UDFToDouble(value) = 1.0D) (type: boolean) Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), i (type: int) @@ -2653,16 +2653,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - filterExpr: enforce_constraint(key is not null) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: enforce_constraint(key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: key (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -2670,17 +2670,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - filterExpr: enforce_constraint(key is not null) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: enforce_constraint(key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: key (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2690,29 +2690,36 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col6 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: null (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: null (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + expressions: _col1 (type: int), 'a1' (type: string), null (type: void) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint(_col0 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), 'a1' (type: string), null (type: string) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2725,7 +2732,7 @@ STAGE PLANS: name: default.acidtable Write Type: INSERT Select Operator - expressions: _col0 (type: int), 'a1' (type: string), null (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2846,12 +2853,12 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-4 @@ -2861,112 +2868,131 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Reducer 2 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string), ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 8 Map Operator Tree: TableScan alias: s Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), a1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: a1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map 8 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 2 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col6 < 3) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col3 (type: string), _col4 (type: struct), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col4 = _col1) and (_col1 < 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col6 > 3) and (_col6 >= 3) and enforce_constraint(_col0 is not null) and (_col0 = _col6)) (type: boolean) - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col3 (type: string) - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col4 is null (type: boolean) + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string), null (type: void) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint(_col0 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string) + Filter Operator + predicate: ((_col4 = _col1) and (_col1 > 3) and (_col1 >= 3)) (type: boolean) Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: string) + Filter Operator + predicate: (_col4 = _col1) (type: boolean) + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col3 + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col3 (type: struct) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: (enforce_constraint(_col6 is not null) and _col0 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: null (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: null (type: string) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2984,50 +3010,10 @@ STAGE PLANS: name: default.acidtable Write Type: DELETE Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 'a1' (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acidtable - Write Type: UPDATE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), null (type: string) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -3040,7 +3026,7 @@ STAGE PLANS: name: default.acidtable Write Type: INSERT Select Operator - expressions: _col0 (type: int), _col1 (type: string), null (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3054,7 +3040,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 7 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3069,6 +3055,49 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 'a1' (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint(_col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: UPDATE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-5 Dependency Collection @@ -3088,7 +3117,7 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false @@ -3097,44 +3126,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtable - Write Type: UPDATE + Write Type: INSERT Stage: Stage-7 Stats Work Basic Stats Work: - Stage: Stage-3 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: UPDATE Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, a1, value + Column Types: int, string, string + Table: default.acidtable - Stage: Stage-1 + Stage: Stage-3 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acidtable - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-9 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: key, a1, value - Column Types: int, string, string - Table: default.acidtable PREHOOK: query: MERGE INTO acidTable as t using nonacid_n1 as s ON t.key = s.key WHEN MATCHED AND s.key < 3 THEN DELETE @@ -3219,13 +3248,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t + filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 @@ -3233,12 +3270,16 @@ STAGE PLANS: TableScan alias: s Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -3248,26 +3289,28 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: 404 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 404 (type: int) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: 404 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: 404 (type: int) + expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3280,7 +3323,7 @@ STAGE PLANS: name: default.acidtable2 Write Type: INSERT Select Operator - expressions: 404 (type: int) + expressions: _col0 (type: int) outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git ql/src/test/results/clientpositive/llap/join0.q.out ql/src/test/results/clientpositive/llap/join0.q.out index 0a81c5bca3..2abc85396e 100644 --- ql/src/test/results/clientpositive/llap/join0.q.out +++ ql/src/test/results/clientpositive/llap/join0.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -36,10 +36,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -56,10 +56,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -109,7 +109,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN FORMATTED SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -130,7 +130,7 @@ SELECT src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM (SELECT * FROM src WHERE src.key < 10) src1 diff --git ql/src/test/results/clientpositive/llap/llap_acid.q.out ql/src/test/results/clientpositive/llap/llap_acid.q.out index b8a4058f6c..8b6f49dba9 100644 --- ql/src/test/results/clientpositive/llap/llap_acid.q.out +++ ql/src/test/results/clientpositive/llap/llap_acid.q.out @@ -319,7 +319,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:struct, VALUE._col1:float, VALUE._col2:double, VALUE._col3:smallint + dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:float, VALUE._col1:double, VALUE._col2:smallint partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint] Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out index b91c3fffa7..86136ace26 100644 --- ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out +++ ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out @@ -313,7 +313,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:struct, VALUE._col1:float, VALUE._col2:double, VALUE._col3:smallint + dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:float, VALUE._col1:double, VALUE._col2:smallint partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint] Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index 29deff9eca..18f70927d8 100644 --- ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -627,17 +627,17 @@ STAGE PLANS: alias: orc_pred Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s is not null and (s like 'bob%') and (t <> -1) and (t <> -2) and (t <> -3) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -646,10 +646,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -695,20 +695,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - filterExpr: (s is not null and (s like 'bob%') and (t <> -1) and (t <> -2) and (t <> -3) and t BETWEEN 25 AND 30) (type: boolean) + filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s is not null and (s like 'bob%') and (t <> -1) and (t <> -2) and (t <> -3) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -717,10 +717,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index 3a287bcda8..da94fef4a6 100644 --- ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -561,17 +561,17 @@ STAGE PLANS: alias: tbl_pred Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s is not null and (s like 'bob%') and (t <> -1) and (t <> -2) and (t <> -3) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs (cache only) Reducer 2 @@ -580,10 +580,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -629,20 +629,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - filterExpr: (s is not null and (s like 'bob%') and (t <> -1) and (t <> -2) and (t <> -3) and t BETWEEN 25 AND 30) (type: boolean) + filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s is not null and (s like 'bob%') and (t <> -1) and (t <> -2) and (t <> -3) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs (cache only) Reducer 2 @@ -651,10 +651,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 2525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/runtime_stats_merge.q.out ql/src/test/results/clientpositive/llap/runtime_stats_merge.q.out index edcdfb4175..08c69b11c3 100644 --- ql/src/test/results/clientpositive/llap/runtime_stats_merge.q.out +++ ql/src/test/results/clientpositive/llap/runtime_stats_merge.q.out @@ -108,9 +108,9 @@ POSTHOOK: Output: default@lineitem2 POSTHOOK: Output: default@merge_tmp_table POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(lineitem2)lineitem2.FieldSchema(name:ROW__ID, type:struct, comment:), ] Vertex dependency in root stage -Map 2 <- Map 1 (BROADCAST_EDGE) -Reducer 3 <- Map 2 (SIMPLE_EDGE) -Reducer 4 <- Map 2 (SIMPLE_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Map 1 (SIMPLE_EDGE) Stage-4 Stats Work{} @@ -120,52 +120,56 @@ Stage-4 Stage-3 Dependency Collection{} Stage-2 - Reducer 3 vectorized, llap - File Output Operator [FS_54] + Reducer 2 vectorized, llap + File Output Operator [FS_55] table:{"name:":"default.lineitem2"} - Select Operator [SEL_53] (runtime: rows=1 width=76) + Select Operator [SEL_54] (runtime: rows=1 width=76) Output:["_col0"] - <-Map 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] PartitionCols:UDFToInteger(_col0) - Select Operator [SEL_9] (runtime: rows=1 width=76) + Select Operator [SEL_11] (runtime: rows=1 width=76) Output:["_col0"] - Filter Operator [FIL_25] (runtime: rows=1 width=84) - predicate:(_col4 = _col0) - Map Join Operator [MAPJOIN_41] (runtime: rows=1 width=84) - Conds:FIL_29.l_orderkey=RS_45._col0(Inner),Output:["_col0","_col3","_col4"] - <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_45] - PartitionCols:_col0 - Select Operator [SEL_44] (runtime: rows=1 width=4) - Output:["_col0"] - Filter Operator [FIL_43] (runtime: rows=1 width=4) + Filter Operator [FIL_10] (runtime: rows=1 width=84) + predicate:(_col1 = _col2) + Select Operator [SEL_9] (runtime: rows=1 width=84) + Output:["_col0","_col1","_col2"] + Map Join Operator [MAPJOIN_40] (runtime: rows=1 width=84) + Conds:SEL_2._col0=RS_44._col0(Inner),Output:["_col0","_col1","_col2"] + <-Map 4 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_44] + PartitionCols:_col0 + Select Operator [SEL_43] (runtime: rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_42] (runtime: rows=1 width=4) + predicate:l_orderkey is not null + TableScan [TS_3] (runtime: rows=1 width=4) + default@lineitem_stage,lineitem_stage, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey"] + <-Select Operator [SEL_2] (runtime: rows=3 width=80) + Output:["_col0","_col1"] + Filter Operator [FIL_27] (runtime: rows=3 width=4) predicate:l_orderkey is not null - TableScan [TS_0] (runtime: rows=1 width=4) - default@lineitem_stage,lineitem_stage, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey"] - <-Filter Operator [FIL_29] (runtime: rows=3 width=4) - predicate:l_orderkey is not null - TableScan [TS_2] (runtime: rows=3 width=4) - default@lineitem2,lineitem2, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey"] - Reducer 4 llap - File Output Operator [FS_22] + TableScan [TS_0] (runtime: rows=3 width=4) + default@lineitem2,lineitem2, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey"] + Reducer 3 llap + File Output Operator [FS_24] table:{"name:":"default.merge_tmp_table"} - Select Operator [SEL_21] (runtime: rows=0 width=-1) + Select Operator [SEL_23] (runtime: rows=0 width=-1) Output:["_col0"] - Filter Operator [FIL_26] (runtime: rows=0 width=-1) + Filter Operator [FIL_22] (runtime: rows=0 width=-1) predicate:(_col1 > 1L) - Group By Operator [GBY_19] (runtime: rows=1 width=84) + Group By Operator [GBY_21] (runtime: rows=1 width=84) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0 - Group By Operator [GBY_17] (runtime: rows=1 width=84) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col3 - Select Operator [SEL_16] (runtime: rows=1 width=84) - Output:["_col3"] - Filter Operator [FIL_27] (runtime: rows=1 width=84) - predicate:(_col4 = _col0) - Please refer to the previous Map Join Operator [MAPJOIN_41] + Group By Operator [GBY_19] (runtime: rows=1 width=84) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Select Operator [SEL_18] (runtime: rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_17] (runtime: rows=1 width=84) + predicate:(_col1 = _col2) + Please refer to the previous Select Operator [SEL_9] Stage-5 Stats Work{} Stage-1 diff --git ql/src/test/results/clientpositive/llap/semijoin.q.out ql/src/test/results/clientpositive/llap/semijoin.q.out index 46ff455ecc..cc5878937a 100644 --- ql/src/test/results/clientpositive/llap/semijoin.q.out +++ ql/src/test/results/clientpositive/llap/semijoin.q.out @@ -152,13 +152,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -195,7 +199,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE @@ -273,13 +277,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -316,7 +324,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE @@ -396,13 +404,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -439,7 +451,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE @@ -506,18 +518,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: key is not null (type: boolean) + filterExpr: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), true (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), true (type: boolean) + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -530,20 +546,20 @@ STAGE PLANS: predicate: (key < 15) (type: boolean) Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col1 + expressions: key (type: int), key is not null (type: boolean) + outputColumnNames: _col0, _col1 Statistics: Num rows: 17 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col1 (type: int), _col1 (type: int) - minReductionHashAggr: 0.0 + keys: _col0 (type: int), _col1 (type: boolean) + minReductionHashAggr: 0.35294116 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs @@ -554,8 +570,8 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 _col1 (type: int) + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col1 Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -641,13 +657,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), true (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), true (type: boolean) + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -660,21 +680,21 @@ STAGE PLANS: predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.6666666 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -684,8 +704,8 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -751,6 +771,28 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: t3_n12 @@ -777,24 +819,6 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -802,29 +826,29 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -873,6 +897,28 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), true (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), true (type: boolean) + Statistics: Num rows: 6 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: t2_n33 @@ -882,39 +928,21 @@ STAGE PLANS: predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -924,8 +952,8 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -992,6 +1020,28 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 2) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: t1_n55 @@ -1018,24 +1068,6 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1043,25 +1075,25 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1120,12 +1152,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -1162,7 +1198,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE @@ -1253,26 +1289,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Select Operator + expressions: value (type: string), key (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - filterExpr: (2 * key) is not null (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (2 * key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: (2 * key) (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1282,10 +1322,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (2 * _col0) (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs @@ -1296,8 +1336,8 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -1375,13 +1415,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -1393,13 +1437,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 6 @@ -1436,17 +1484,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col5 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1454,20 +1502,16 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int), _col3 (type: string) + value expressions: _col2 (type: int), _col3 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1547,12 +1591,16 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int), value (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -1589,7 +1637,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int), value (type: string) + 0 _col0 (type: int), _col1 (type: string) 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE @@ -1678,12 +1726,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -1747,7 +1799,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE @@ -1850,26 +1902,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -1904,22 +1972,19 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col5 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 24 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1927,25 +1992,25 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2030,12 +2095,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -2047,12 +2116,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -2089,15 +2162,15 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col5 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reducer 3 @@ -2107,7 +2180,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE @@ -2208,26 +2281,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -2262,22 +2351,19 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Full Outer Join 0 to 1 + Right Outer Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col5 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2285,25 +2371,25 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2391,12 +2477,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -2430,13 +2520,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2446,7 +2544,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE @@ -2464,7 +2562,7 @@ STAGE PLANS: Left Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -2569,12 +2667,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -2609,12 +2711,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2624,7 +2730,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE @@ -2642,7 +2748,7 @@ STAGE PLANS: Right Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 28 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -2749,12 +2855,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -2789,12 +2899,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2804,7 +2918,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE @@ -2822,7 +2936,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -2940,13 +3054,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2980,13 +3098,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: value (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: value (type: string) + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2996,7 +3122,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE @@ -3015,7 +3141,7 @@ STAGE PLANS: Left Outer Join 0 to 1 keys: 0 _col1 (type: string) - 1 value (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator diff --git ql/src/test/results/clientpositive/llap/sort_acid.q.out ql/src/test/results/clientpositive/llap/sort_acid.q.out new file mode 100644 index 0000000000..ff0a5adddc --- /dev/null +++ ql/src/test/results/clientpositive/llap/sort_acid.q.out @@ -0,0 +1,149 @@ +PREHOOK: query: create table acidtlb(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acidtlb +POSTHOOK: query: create table acidtlb(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acidtlb +PREHOOK: query: create table othertlb(c int, d int) stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@othertlb +POSTHOOK: query: create table othertlb(c int, d int) stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@othertlb +PREHOOK: query: insert into acidtlb values(10,200),(30,500) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acidtlb +POSTHOOK: query: insert into acidtlb values(10,200),(30,500) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acidtlb +POSTHOOK: Lineage: acidtlb.a SCRIPT [] +POSTHOOK: Lineage: acidtlb.b SCRIPT [] +PREHOOK: query: insert into othertlb values(10, 21),(30, 22),(60, 23),(70, 24),(80, 25) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@othertlb +POSTHOOK: query: insert into othertlb values(10, 21),(30, 22),(60, 23),(70, 24),(80, 25) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@othertlb +POSTHOOK: Lineage: othertlb.c SCRIPT [] +POSTHOOK: Lineage: othertlb.d SCRIPT [] +PREHOOK: query: explain cbo +select a, 6 as c, b from acidtlb sort by a, c, b +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select a, 6 as c, b from acidtlb sort by a, c, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +#### A masked pattern was here #### +CBO PLAN: +HiveSortExchange(distribution=[any], collation=[[0, 2]]) + HiveProject(a=[$0], c=[6], b=[$1]) + HiveTableScan(table=[[default, acidtlb]], table:alias=[acidtlb]) + +PREHOOK: query: select a, 6 as c, b from acidtlb sort by a, c, b +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +#### A masked pattern was here #### +POSTHOOK: query: select a, 6 as c, b from acidtlb sort by a, c, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +#### A masked pattern was here #### +10 6 200 +30 6 500 +PREHOOK: query: update acidtlb set b=777 +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +PREHOOK: Output: default@acidtlb +POSTHOOK: query: update acidtlb set b=777 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +POSTHOOK: Output: default@acidtlb +PREHOOK: query: select * from acidtlb +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +#### A masked pattern was here #### +POSTHOOK: query: select * from acidtlb +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +#### A masked pattern was here #### +10 777 +30 777 +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: update acidtlb set b=350 +where a in (select a from acidtlb where a = 30) +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +PREHOOK: Output: default@acidtlb +POSTHOOK: query: update acidtlb set b=350 +where a in (select a from acidtlb where a = 30) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +POSTHOOK: Output: default@acidtlb +PREHOOK: query: select * from acidtlb +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +#### A masked pattern was here #### +POSTHOOK: query: select * from acidtlb +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +#### A masked pattern was here #### +10 777 +30 350 +PREHOOK: query: update acidtlb set b=450 +where a in (select c from othertlb where c < 65) +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +PREHOOK: Input: default@othertlb +PREHOOK: Output: default@acidtlb +POSTHOOK: query: update acidtlb set b=450 +where a in (select c from othertlb where c < 65) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +POSTHOOK: Input: default@othertlb +POSTHOOK: Output: default@acidtlb +PREHOOK: query: select * from acidtlb +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +#### A masked pattern was here #### +POSTHOOK: query: select * from acidtlb +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +#### A masked pattern was here #### +10 450 +30 450 +PREHOOK: query: delete from acidtlb +where a in ( + select a from acidtlb a + join othertlb o on a.a = o.c + where o.d = 21) +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +PREHOOK: Input: default@othertlb +PREHOOK: Output: default@acidtlb +POSTHOOK: query: delete from acidtlb +where a in ( + select a from acidtlb a + join othertlb o on a.a = o.c + where o.d = 21) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +POSTHOOK: Input: default@othertlb +POSTHOOK: Output: default@acidtlb +PREHOOK: query: select * from acidtlb +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtlb +#### A masked pattern was here #### +POSTHOOK: query: select * from acidtlb +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtlb +#### A masked pattern was here #### +30 450 diff --git ql/src/test/results/clientpositive/llap/sqlmerge.q.out ql/src/test/results/clientpositive/llap/sqlmerge.q.out index ad21ef626d..8384a6c851 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -41,12 +41,12 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-4 @@ -56,122 +56,138 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Reducer 2 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 8 Map Operator Tree: TableScan alias: s Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: b (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 > 8) and (_col0 = _col5)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 <= 8) and (_col0 = _col5)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: struct), _col0 (type: int) + expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: struct) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Filter Operator - predicate: (_col0 = _col5) (type: boolean) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 8 + Map Operator Tree: + TableScan + alias: t + filterExpr: a is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col4 + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col4 (type: struct) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: a (type: int), ROW__ID (type: struct) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: struct) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col1 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col1 (type: int), _col2 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 = _col3) and (_col3 > 8)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Filter Operator + predicate: ((_col2 = _col3) and (_col3 <= 8)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col2 = _col3) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -179,55 +195,15 @@ STAGE PLANS: name: default.acidtbl_n0 Write Type: DELETE Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acidtbl_n0 - Write Type: UPDATE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -237,7 +213,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: a, b - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') minReductionHashAggr: 0.99 @@ -249,7 +225,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 7 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -264,6 +240,46 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl_n0 + Write Type: UPDATE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-5 Dependency Collection @@ -283,7 +299,7 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false @@ -292,44 +308,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl_n0 - Write Type: UPDATE + Write Type: INSERT Stage: Stage-7 Stats Work Basic Stats Work: - Stage: Stage-3 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl_n0 + Write Type: UPDATE Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl_n0 - Stage: Stage-1 + Stage: Stage-3 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acidtbl_n0 - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-9 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: a, b - Column Types: int, int - Table: default.acidtbl_n0 PREHOOK: query: explain merge into acidTbl_n0 as t using nonAcidOrcTbl_n0 s ON t.a = s.a WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) @@ -363,13 +379,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t + filterExpr: a is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 @@ -377,13 +401,17 @@ STAGE PLANS: TableScan alias: s Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: b (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -393,15 +421,15 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: int), _col6 (type: int) + expressions: _col1 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out index 9e31c64f83..b857f36211 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out @@ -129,12 +129,12 @@ POSTHOOK: Output: default@t STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -143,140 +143,116 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 7 Map Operator Tree: TableScan alias: u Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: b (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: t + filterExpr: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col4, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: struct), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col3 = _col0) (type: boolean) Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col4 (type: struct) - minReductionHashAggr: 0.0 - mode: hash + Select Operator + expressions: _col2 (type: struct), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col3 = _col0) (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col2 (type: struct) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 99 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t - Write Type: UPDATE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator @@ -307,7 +283,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 6 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -322,11 +298,51 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 99 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t + Write Type: UPDATE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-4 Dependency Collection - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: false @@ -335,44 +351,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t - Write Type: UPDATE + Write Type: INSERT Stage: Stage-5 Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t + Write Type: UPDATE Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.t - Stage: Stage-0 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-7 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: a, b - Column Types: int, int - Table: default.t PREHOOK: query: merge into t as t using upd_t as u ON t.a = u.a WHEN MATCHED THEN UPDATE SET b = 99 @@ -665,12 +681,12 @@ POSTHOOK: Output: default@t2 STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -679,23 +695,31 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: t + filterExpr: a is not null (type: boolean) Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c (type: int), ROW__ID (type: struct) + Select Operator + expressions: a (type: int), c (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 7 @@ -703,13 +727,17 @@ STAGE PLANS: TableScan alias: u Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: b (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -719,150 +747,154 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col3 (type: int) - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 + Select Operator + expressions: _col3 (type: int), _col4 (type: int), _col2 (type: struct), _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col4 is null (type: boolean) Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int) + Filter Operator + predicate: (_col4 = _col0) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col4 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: int) + Filter Operator + predicate: (_col4 = _col0) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col2 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col2 (type: struct) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col6 (type: int), _col7 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 99 (type: int), VALUE._col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t2 - Write Type: UPDATE + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: a, b, c + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 99 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t2 - Write Type: INSERT - Select Operator - expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) - outputColumnNames: a, b, c - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Write Type: UPDATE Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-4 Dependency Collection - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: false @@ -871,44 +903,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t2 - Write Type: UPDATE + Write Type: INSERT Stage: Stage-5 Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b, c + Column Types: int, int, int + Table: default.t2 - Stage: Stage-0 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t2 - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-7 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: a, b, c - Column Types: int, int, int - Table: default.t2 PREHOOK: query: merge into t2 as t using upd_t2_1 as u ON t.a = u.a WHEN MATCHED THEN UPDATE SET b = 99 @@ -953,12 +985,12 @@ POSTHOOK: Output: default@t2 STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -967,23 +999,31 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: t + filterExpr: a is not null (type: boolean) Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c (type: int), ROW__ID (type: struct) + Select Operator + expressions: a (type: int), c (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 7 @@ -991,13 +1031,17 @@ STAGE PLANS: TableScan alias: u Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: b (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1007,104 +1051,68 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col3 (type: int) - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 + Select Operator + expressions: _col3 (type: int), _col4 (type: int), _col2 (type: struct), _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col4 is null (type: boolean) Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int) + Filter Operator + predicate: (_col4 = _col0) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col4 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: int) + Filter Operator + predicate: (_col4 = _col0) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col2 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col2 (type: struct) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col6 (type: int), _col7 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 98 (type: int), VALUE._col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t2 - Write Type: UPDATE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), 1 (type: int) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1117,7 +1125,7 @@ STAGE PLANS: name: default.t2 Write Type: INSERT Select Operator - expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1131,7 +1139,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 6 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1146,11 +1154,51 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 98 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-4 Dependency Collection - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: false @@ -1159,44 +1207,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t2 - Write Type: UPDATE + Write Type: INSERT Stage: Stage-5 Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b, c + Column Types: int, int, int + Table: default.t2 - Stage: Stage-0 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t2 - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-7 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: a, b, c - Column Types: int, int, int - Table: default.t2 PREHOOK: query: merge into t2 as t using upd_t2_2 as u ON t.a = u.a WHEN MATCHED THEN UPDATE SET b = 98 @@ -1241,12 +1289,12 @@ POSTHOOK: Output: default@t2 STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1255,23 +1303,31 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: t + filterExpr: a is not null (type: boolean) Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c (type: int), ROW__ID (type: struct) + Select Operator + expressions: a (type: int), c (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 7 @@ -1279,13 +1335,17 @@ STAGE PLANS: TableScan alias: u Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: b (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1295,104 +1355,68 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 5 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col3 (type: int) - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 + Select Operator + expressions: _col3 (type: int), _col4 (type: int), _col2 (type: struct), _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col4 is null (type: boolean) Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int) + Filter Operator + predicate: (_col4 = _col0) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col4 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: int) + Filter Operator + predicate: (_col4 = _col0) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col2 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col2 (type: struct) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col6 (type: int), _col7 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 97 (type: int), VALUE._col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t2 - Write Type: UPDATE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), 1 (type: int) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1405,7 +1429,7 @@ STAGE PLANS: name: default.t2 Write Type: INSERT Select Operator - expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1419,7 +1443,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 6 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1434,11 +1458,51 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 97 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-4 Dependency Collection - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: false @@ -1447,44 +1511,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t2 - Write Type: UPDATE + Write Type: INSERT Stage: Stage-5 Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b, c + Column Types: int, int, int + Table: default.t2 - Stage: Stage-0 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t2 - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-7 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: a, b, c - Column Types: int, int, int - Table: default.t2 PREHOOK: query: merge into t2 as t using upd_t2_3 as u ON t.a = u.a WHEN MATCHED THEN UPDATE SET b = 97 @@ -1529,12 +1593,12 @@ POSTHOOK: Output: default@t2 STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1543,23 +1607,31 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: t + filterExpr: a is not null (type: boolean) Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c (type: int), ROW__ID (type: struct) + Select Operator + expressions: a (type: int), c (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 7 @@ -1567,13 +1639,17 @@ STAGE PLANS: TableScan alias: u Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: b (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1583,104 +1659,68 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col2, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 5 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col3 (type: int) - Filter Operator - predicate: (_col0 = _col6) (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: struct) - outputColumnNames: _col5 + Select Operator + expressions: _col3 (type: int), _col4 (type: int), _col2 (type: struct), _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col4 is null (type: boolean) Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col5 (type: struct) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int) + Filter Operator + predicate: (_col4 = _col0) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col4 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: int) + Filter Operator + predicate: (_col4 = _col0) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col2 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col2 (type: struct) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col6 (type: int), _col7 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 96 (type: int), VALUE._col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t2 - Write Type: UPDATE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), 1 (type: int) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1693,7 +1733,7 @@ STAGE PLANS: name: default.t2 Write Type: INSERT Select Operator - expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1707,7 +1747,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 6 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1722,11 +1762,51 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 96 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-4 Dependency Collection - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: false @@ -1735,44 +1815,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t2 - Write Type: UPDATE + Write Type: INSERT Stage: Stage-5 Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b, c + Column Types: int, int, int + Table: default.t2 - Stage: Stage-0 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t2 - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-7 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: a, b, c - Column Types: int, int, int - Table: default.t2 PREHOOK: query: merge into t2 as t using upd_t2_4 as u ON t.a = u.a WHEN MATCHED THEN UPDATE SET b = 96 @@ -1871,10 +1951,10 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 - Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 Stage-1 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1884,36 +1964,48 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: ROW__ID (type: struct) + alias: u + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 7 Map Operator Tree: TableScan - alias: u - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: b (type: int) + alias: t + filterExpr: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1921,104 +2013,85 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col4, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: struct), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col3 = _col0) (type: boolean) Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col4 (type: struct) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col6 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t3 - Write Type: DELETE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 5 + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col3 = _col0) (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col2 (type: struct) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t3 + Write Type: DELETE + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), 1 (type: int) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2031,7 +2104,7 @@ STAGE PLANS: name: default.t3 Write Type: INSERT Select Operator - expressions: _col0 (type: int), 1 (type: int) + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2045,7 +2118,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2060,6 +2133,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-4 Dependency Collection @@ -2079,20 +2176,6 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-2 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - - Stage: Stage-6 - Stats Work - Basic Stats Work: - Stage: Stage-1 Move Operator tables: @@ -2104,7 +2187,7 @@ STAGE PLANS: name: default.t3 Write Type: INSERT - Stage: Stage-7 + Stage: Stage-6 Stats Work Basic Stats Work: Column Stats Desc: @@ -2112,6 +2195,20 @@ STAGE PLANS: Column Types: int, int Table: default.t3 + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-7 + Stats Work + Basic Stats Work: + PREHOOK: query: merge into t3 as t using upd_t3 as u ON t.a = u.a WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b) @@ -2200,12 +2297,12 @@ POSTHOOK: Output: default@t4 STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -2214,37 +2311,49 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: ROW__ID (type: struct) + alias: u + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 7 Map Operator Tree: TableScan - alias: u - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: a (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: b (type: int) + alias: t + filterExpr: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), ROW__ID (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 @@ -2252,105 +2361,70 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 a (type: int) - 1 a (type: int) - outputColumnNames: _col0, _col4, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: struct), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col3 = _col0) (type: boolean) Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col4 (type: struct) - minReductionHashAggr: 0.0 - mode: hash + Select Operator + expressions: _col2 (type: struct), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: struct) + Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col6 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col3 = _col0) (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col2 (type: struct) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t4 - Write Type: UPDATE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), 1 (type: int) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2363,7 +2437,7 @@ STAGE PLANS: name: default.t4 Write Type: INSERT Select Operator - expressions: _col0 (type: int), 1 (type: int) + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2377,7 +2451,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 6 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2392,11 +2466,51 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t4 + Write Type: UPDATE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-4 Dependency Collection - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: false @@ -2405,44 +2519,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t4 - Write Type: UPDATE + Write Type: INSERT Stage: Stage-5 Stats Work Basic Stats Work: - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t4 + Write Type: UPDATE Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.t4 - Stage: Stage-0 + Stage: Stage-2 Move Operator tables: replace: false table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.t4 - Write Type: INSERT + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table Stage: Stage-7 Stats Work Basic Stats Work: - Column Stats Desc: - Columns: a, b - Column Types: int, int - Table: default.t4 PREHOOK: query: merge into t4 as t using upd_t4 as u ON t.a = u.a WHEN MATCHED THEN UPDATE SET b = default diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 2fa252d942..ec4696ccf1 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -139,33 +139,42 @@ STAGE PLANS: nonOuterSmallTableKeyMapping: [] projectedOutput: 0:string, 1:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + selectExpressions: VectorUDFAdaptor(hash(_col1,_col2)) -> 3:int + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -297,33 +306,42 @@ STAGE PLANS: projectedOutput: 3:string, 4:string smallTableValueMapping: 4:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col1,_col2)) -> 5:int + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -345,31 +363,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -473,29 +499,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -544,33 +578,42 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutput: 0:string, 1:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + selectExpressions: VectorUDFAdaptor(hash(_col1,_col2)) -> 3:int + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -769,19 +812,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -869,8 +916,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -897,17 +944,78 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinInnerBigOnlyStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 0:string, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + selectExpressions: VectorUDFAdaptor(hash(_col2,_col3)) -> 4:int + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -916,21 +1024,21 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 2 + scratchColumnTypeNames: [string, bigint] + Map 3 Map Operator Tree: TableScan alias: orcsrc_n0 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] @@ -940,78 +1048,26 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:string) predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - bigTableKeyColumns: 0:string - bigTableRetainColumnNums: [0, 1] - bigTableValueColumns: 0:string, 1:string - className: VectorMapJoinInnerBigOnlyStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nonOuterSmallTableKeyMapping: [] - projectedOutput: 0:string, 0:string, 1:string - hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - bigTableKeyColumns: 0:string - bigTableRetainColumnNums: [0, 1] - bigTableValueColumns: 0:string, 1:string - className: VectorMapJoinInnerBigOnlyStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nonOuterSmallTableKeyMapping: [] - projectedOutput: 0:string, 1:string - hashTableImplementationType: OPTIMIZED - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1020,21 +1076,21 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true + allNative: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan alias: orcsrc_n0 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] @@ -1044,15 +1100,15 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:string) predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z @@ -1063,7 +1119,9 @@ STAGE PLANS: keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1077,11 +1135,11 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1165,8 +1223,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1193,17 +1251,77 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 0:string, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + selectExpressions: VectorUDFAdaptor(hash(_col2,_col3)) -> 4:int + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1212,21 +1330,21 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 2 + scratchColumnTypeNames: [string, bigint] + Map 3 Map Operator Tree: TableScan alias: orcsrc_n0 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] @@ -1236,77 +1354,26 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:string) predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - bigTableKeyColumns: 0:string - bigTableRetainColumnNums: [0, 1] - bigTableValueColumns: 0:string, 1:string - className: VectorMapJoinInnerBigOnlyStringOperator + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nonOuterSmallTableKeyMapping: [] - projectedOutput: 0:string, 0:string, 1:string - hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - bigTableKeyColumns: 0:string - bigTableRetainColumnNums: [0, 1] - bigTableValueColumns: 0:string, 1:string - className: VectorMapJoinOuterStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutput: 0:string, 1:string - hashTableImplementationType: OPTIMIZED - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1315,42 +1382,52 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true + allNative: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan alias: orcsrc_n0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1364,11 +1441,11 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1489,7 +1566,7 @@ STAGE PLANS: projectedOutput: 0:string, 3:string, 4:string smallTableValueMapping: 4:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE @@ -1508,33 +1585,42 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutput: 3:string, 4:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 1 Map 4 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col1,_col2)) -> 5:int + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1556,31 +1642,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1602,29 +1696,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1735,52 +1837,60 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - bigTableKeyColumns: 0:string - bigTableRetainColumnNums: [0] - bigTableValueColumns: 0:string - className: VectorMapJoinOuterStringOperator + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - outerSmallTableKeyMapping: 0 -> 3 - projectedOutput: 0:string, 3:string, 4:string - smallTableValueMapping: 4:string - hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 3:string, 4:string + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 0:string, 3:string, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 2 Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:string, 4:string + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1802,31 +1912,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1875,33 +1993,42 @@ STAGE PLANS: projectedOutput: 3:string, 4:string smallTableValueMapping: 3:string, 4:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col1,_col2)) -> 5:int + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2012,29 +2139,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2056,51 +2191,59 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - bigTableKeyColumns: 0:string - bigTableRetainColumnNums: [0, 1] - bigTableValueColumns: 0:string, 1:string - className: VectorMapJoinOuterStringOperator + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - outerSmallTableKeyMapping: 0 -> 3 - projectedOutput: 3:string, 0:string, 1:string - hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 3:string + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string + className: VectorMapJoinInnerBigOnlyStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:string, 1:string + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 0:string, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string, 1:string + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2117,7 +2260,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [string] + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -2149,33 +2292,42 @@ STAGE PLANS: projectedOutput: 3:string, 4:string smallTableValueMapping: 3:string, 4:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 2 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col1,_col2)) -> 5:int + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2392,7 +2544,7 @@ STAGE PLANS: nonOuterSmallTableKeyMapping: [] projectedOutput: 0:string, 0:string, 1:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 input vertices: 0 Map 1 Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE @@ -2408,7 +2560,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:string Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2435,19 +2587,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 1291 Data size: 229798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1291 Data size: 229798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -2650,7 +2806,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) @@ -2658,7 +2814,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -2671,19 +2827,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 1582 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1582 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -2869,29 +3029,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2918,7 +3086,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -2926,21 +3094,25 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: - 1 Map 5 - Statistics: Num rows: 1582 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + 1 Map 5 + Statistics: Num rows: 1582 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1582 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -3066,7 +3238,7 @@ STAGE PLANS: projectedOutput: 0:string, 3:string, 4:string smallTableValueMapping: 4:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 4 Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE @@ -3082,7 +3254,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 3:string, 4:string Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3104,31 +3276,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3199,19 +3379,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 1291 Data size: 229798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1291 Data size: 229798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -3299,38 +3483,69 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 0:string, 3:string, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:string, 4:string + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3347,36 +3562,44 @@ STAGE PLANS: includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 3 + scratchColumnTypeNames: [string, string] + Map 2 Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3394,7 +3617,7 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -3425,33 +3648,42 @@ STAGE PLANS: projectedOutput: 3:string, 4:string smallTableValueMapping: 3:string, 4:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: - 0 Reducer 2 - Statistics: Num rows: 1582 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - minReductionHashAggr: 0.99 - mode: hash + 0 Map 1 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col1,_col2)) -> 5:int + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3469,28 +3701,7 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [string, string, bigint] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string) - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 5 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -3587,25 +3798,32 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -3674,7 +3892,7 @@ STAGE PLANS: outerSmallTableKeyMapping: 0 -> 3 projectedOutput: 3:string, 0:string, 1:string hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 input vertices: 0 Map 1 Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE @@ -3690,7 +3908,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:string Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3717,19 +3935,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 1291 Data size: 229798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1291 Data size: 229798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 951f824c30..27f261765c 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -166,21 +166,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -285,21 +289,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -406,21 +414,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 3 @@ -514,30 +526,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: key is not null (type: boolean) + filterExpr: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + outputColumnNames: _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -550,20 +566,20 @@ STAGE PLANS: predicate: (key < 15) (type: boolean) Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col1 + expressions: key (type: int), key is not null (type: boolean) + outputColumnNames: _col0, _col1 Statistics: Num rows: 17 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col1 (type: int), _col1 (type: int) - minReductionHashAggr: 0.0 + keys: _col0 (type: int), _col1 (type: boolean) + minReductionHashAggr: 0.35294116 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs @@ -647,21 +663,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -674,21 +694,21 @@ STAGE PLANS: predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.6666666 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -750,11 +770,45 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: t3_n35 @@ -781,46 +835,16 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -870,11 +894,45 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: t2_n87 @@ -884,54 +942,24 @@ STAGE PLANS: predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -987,11 +1015,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 2) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: t1_n148 @@ -1018,42 +1076,16 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1118,21 +1150,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -1250,34 +1286,38 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Select Operator + expressions: value (type: string), key (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 Map Operator Tree: TableScan alias: b - filterExpr: (2 * key) is not null (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (2 * key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: (2 * key) (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1287,10 +1327,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (2 * _col0) (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs @@ -1369,29 +1409,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) @@ -1410,13 +1450,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: all inputs Map 4 @@ -1531,21 +1575,25 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -1660,16 +1708,10 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -1678,13 +1720,23 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -1823,48 +1875,61 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 - Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -1900,10 +1965,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1980,37 +2045,72 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -2037,45 +2137,16 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2155,37 +2226,72 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -2212,45 +2318,16 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2330,77 +2407,91 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Left Semi Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.5454545 - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.5454545 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + LLAP IO: all inputs Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2503,31 +2594,35 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 4 - Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + + 1 Map 3 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -2561,13 +2656,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -2673,22 +2776,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -2723,12 +2830,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -2739,7 +2850,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -2863,22 +2974,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -2913,12 +3028,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -2929,7 +3048,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -3052,31 +3171,35 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + + 1 Map 3 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -3110,13 +3233,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: value (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: value (type: string) + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -3329,22 +3460,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -3449,22 +3584,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -3571,22 +3710,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 3 @@ -3680,31 +3823,35 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: key is not null (type: boolean) + filterExpr: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + outputColumnNames: _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -3717,20 +3864,20 @@ STAGE PLANS: predicate: (key < 15) (type: boolean) Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col1 + expressions: key (type: int), key is not null (type: boolean) + outputColumnNames: _col0, _col1 Statistics: Num rows: 17 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col1 (type: int), _col1 (type: int) - minReductionHashAggr: 0.0 + keys: _col0 (type: int), _col1 (type: boolean) + minReductionHashAggr: 0.35294116 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs @@ -3814,22 +3961,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -3842,21 +3993,21 @@ STAGE PLANS: predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.6666666 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -3918,16 +4069,51 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t3_n35 + alias: a filterExpr: (key > 5) (type: boolean) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: t3_n35 + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 5) (type: boolean) Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE @@ -3949,47 +4135,16 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4039,11 +4194,46 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: t2_n87 @@ -4053,55 +4243,24 @@ STAGE PLANS: predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -4157,11 +4316,42 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 2) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: t1_n148 @@ -4188,43 +4378,16 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4289,22 +4452,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -4422,35 +4589,39 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Select Operator + expressions: value (type: string), key (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 Map Operator Tree: TableScan alias: b - filterExpr: (2 * key) is not null (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (2 * key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: (2 * key) (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -4460,10 +4631,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (2 * _col0) (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs @@ -4542,32 +4713,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) null sort order: zz @@ -4585,13 +4756,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: all inputs Map 4 @@ -4706,22 +4881,26 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -4836,17 +5015,10 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -4855,14 +5027,25 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -5001,50 +5184,63 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 - Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -5080,10 +5276,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5160,37 +5356,74 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -5217,46 +5450,16 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5336,37 +5539,74 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -5393,46 +5633,16 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5525,33 +5735,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 4 - Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -5585,13 +5799,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -5697,23 +5919,27 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -5748,12 +5974,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -5764,7 +5994,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -5888,23 +6118,27 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -5939,12 +6173,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -5955,7 +6193,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -6024,33 +6262,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 3 @@ -6084,13 +6326,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: value (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: value (type: string) + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -6300,16 +6550,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -6420,26 +6673,29 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false vectorized: true Map 3 Map Operator Tree: @@ -6542,16 +6798,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -6656,19 +6915,22 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Select Vectorization: - className: VectorSelectOperator - native: true - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -6690,13 +6952,14 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true + selectExpressions: IsNotNull(col 0:int) -> 3:boolean Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap @@ -6784,16 +7047,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -6815,13 +7081,14 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true + selectExpressions: IsNotNull(col 1:string) -> 3:boolean Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap @@ -6891,8 +7158,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: @@ -6904,15 +7171,19 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -6924,24 +7195,23 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan Vectorization: native: true Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Select Vectorization: - className: VectorSelectOperator - native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap @@ -6955,7 +7225,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -7011,8 +7281,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: @@ -7024,15 +7294,19 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinOperator native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7044,24 +7318,24 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan Vectorization: native: true Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Select Vectorization: - className: VectorSelectOperator - native: true + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: IsNotNull(col 1:string) -> 3:boolean + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap @@ -7075,7 +7349,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -7128,8 +7402,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: @@ -7141,13 +7415,14 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator native: false - vectorProcessingMode: HASH + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap @@ -7161,23 +7436,25 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan Vectorization: native: true Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7189,7 +7466,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -7257,16 +7534,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7390,16 +7670,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7421,6 +7704,7 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true + selectExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH @@ -7510,21 +7794,21 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true Map Join Vectorization: - bigTableKeyExpressions: col 2:int + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Select Vectorization: - className: VectorSelectOperator - native: true + Map Join Vectorization: + bigTableKeyExpressions: col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true @@ -7547,10 +7831,13 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7671,16 +7958,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7801,22 +8091,25 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true Map Join Vectorization: bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7966,25 +8259,28 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Filter Vectorization: - className: VectorFilterOperator + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator native: true Map Join Vectorization: - bigTableKeyExpressions: col 1:int + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8000,10 +8296,16 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8129,27 +8431,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8161,23 +8477,68 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8186,10 +8547,10 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -8242,38 +8603,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -8289,13 +8618,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8375,27 +8704,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8407,23 +8750,68 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8432,10 +8820,10 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -8488,38 +8876,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -8535,13 +8891,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8589,22 +8945,25 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true Map Join Vectorization: bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8650,10 +9009,16 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8762,16 +9127,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8817,10 +9185,13 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8946,16 +9317,19 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9001,10 +9375,13 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9075,26 +9452,29 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true Map Join Vectorization: - bigTableKeyExpressions: col 1:string + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true + Map Join Vectorization: + bigTableKeyExpressions: col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] @@ -9136,10 +9516,16 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9360,32 +9746,39 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9542,32 +9935,39 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9726,32 +10126,39 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9892,7 +10299,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: key is not null (type: boolean) + filterExpr: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -9900,41 +10307,48 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: key is not null (type: boolean) + predicate: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9961,11 +10375,12 @@ STAGE PLANS: predicate: (key < 15) (type: boolean) Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col1 + expressions: key (type: int), key is not null (type: boolean) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true + selectExpressions: IsNotNull(col 0:int) -> 3:boolean Statistics: Num rows: 17 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: @@ -9973,18 +10388,18 @@ STAGE PLANS: groupByMode: HASH native: false vectorProcessingMode: HASH - keys: _col1 (type: int), _col1 (type: int) - minReductionHashAggr: 0.0 + keys: _col0 (type: int), _col1 (type: boolean) + minReductionHashAggr: 0.35294116 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -10096,32 +10511,39 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -10148,33 +10570,34 @@ STAGE PLANS: predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + selectExpressions: IsNotNull(col 1:string) -> 3:boolean + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.6666666 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -10257,16 +10680,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t3_n35 + alias: a filterExpr: (key > 5) (type: boolean) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -10274,35 +10697,47 @@ STAGE PLANS: className: VectorFilterOperator native: true predicate: (key > 5) (type: boolean) - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator native: false - vectorProcessingMode: HASH - keys: _col0 (type: int) - minReductionHashAggr: 0.5294118 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -10314,53 +10749,48 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + alias: t3_n35 + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string) + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + minReductionHashAggr: 0.5294118 + mode: hash outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: z sort order: + + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -10372,7 +10802,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -10387,13 +10817,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -10443,15 +10873,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t2_n87 - filterExpr: ((key > 5) and (value <= 'val_20')) (type: boolean) + alias: a + filterExpr: (key > 5) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -10459,36 +10889,48 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH + Statistics: Num rows: 6 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinOperator native: false - vectorProcessingMode: HASH - keys: _col0 (type: int), _col1 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -10500,11 +10942,11 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan - alias: a - filterExpr: key is not null (type: boolean) + alias: t2_n87 + filterExpr: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -10512,41 +10954,37 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value is not null (type: boolean) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: IsNotNull(col 1:string) -> 3:boolean + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int), _col1 (type: boolean) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -10558,7 +10996,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -10626,11 +11064,69 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 2) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 Map Operator Tree: TableScan alias: t1_n148 @@ -10683,58 +11179,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -10749,13 +11194,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -10825,32 +11270,39 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11020,32 +11472,39 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Operator + expressions: value (type: string), key (type: int) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11061,7 +11520,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - filterExpr: (2 * key) is not null (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -11069,14 +11528,15 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: (2 * key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: (2 * key) (type: int) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + selectExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: @@ -11090,10 +11550,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (2 * _col0) (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -11202,47 +11662,47 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 2:int + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true + input vertices: + 1 Map 4 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) null sort order: zz @@ -11278,17 +11738,24 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11455,32 +11922,39 @@ STAGE PLANS: native: true predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11647,23 +12121,13 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -11678,18 +12142,35 @@ STAGE PLANS: nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11901,58 +12382,66 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 - Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 1:int + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11968,19 +12457,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12060,13 +12563,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -12143,27 +12646,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12175,23 +12692,70 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12200,10 +12764,10 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -12256,39 +12820,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -12304,13 +12835,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -12390,27 +12921,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12422,23 +12967,70 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12447,10 +13039,10 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -12503,39 +13095,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -12551,13 +13110,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -12655,49 +13214,56 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0 input vertices: - 1 Map 4 - Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12766,19 +13332,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12910,33 +13490,40 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13008,16 +13595,23 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13037,7 +13631,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -13181,33 +13775,40 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13279,16 +13880,23 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13308,7 +13916,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -13451,49 +14059,56 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: - 0 _col1 (type: string) - 1 value (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 1:string + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13562,19 +14177,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: value (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: value (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13866,29 +14495,36 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14045,29 +14681,36 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14226,29 +14869,36 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14389,7 +15039,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: key is not null (type: boolean) + filterExpr: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -14397,38 +15047,46 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: key is not null (type: boolean) + predicate: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + Map Join Vectorization: + bigTableKeyExpressions: ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinLeftSemiMultiKeyOperator native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14455,11 +15113,12 @@ STAGE PLANS: predicate: (key < 15) (type: boolean) Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col1 + expressions: key (type: int), key is not null (type: boolean) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true + selectExpressions: IsNotNull(col 0:int) -> 3:boolean Statistics: Num rows: 17 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: @@ -14467,18 +15126,18 @@ STAGE PLANS: groupByMode: HASH native: false vectorProcessingMode: HASH - keys: _col1 (type: int), _col1 (type: int) - minReductionHashAggr: 0.0 + keys: _col0 (type: int), _col1 (type: boolean) + minReductionHashAggr: 0.35294116 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -14590,29 +15249,37 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + Map Join Vectorization: + bigTableKeyExpressions: ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinLeftSemiMultiKeyOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14639,33 +15306,34 @@ STAGE PLANS: predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + selectExpressions: IsNotNull(col 1:string) -> 3:boolean + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.6666666 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14748,11 +15416,73 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 Map Operator Tree: TableScan alias: t3_n35 @@ -14805,62 +15535,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -14875,13 +15550,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14931,15 +15606,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t2_n87 - filterExpr: ((key > 5) and (value <= 'val_20')) (type: boolean) + alias: a + filterExpr: (key > 5) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -14947,36 +15622,46 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - keys: _col0 (type: int), _col1 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Statistics: Num rows: 6 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + Map Join Vectorization: + bigTableKeyExpressions: ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinLeftSemiMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14985,14 +15670,14 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan - alias: a - filterExpr: key is not null (type: boolean) + alias: t2_n87 + filterExpr: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -15000,38 +15685,37 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value is not null (type: boolean) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + selectExpressions: IsNotNull(col 1:string) -> 3:boolean + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int), _col1 (type: boolean) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -15040,10 +15724,10 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -15111,11 +15795,66 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 2) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 Map Operator Tree: TableScan alias: t1_n148 @@ -15168,55 +15907,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -15231,13 +15922,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -15307,29 +15998,36 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -15499,29 +16197,36 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + Select Operator + expressions: value (type: string), key (type: int) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -15537,7 +16242,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - filterExpr: (2 * key) is not null (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -15545,14 +16250,15 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: (2 * key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: (2 * key) (type: int) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + selectExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: @@ -15566,10 +16272,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (2 * _col0) (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -15678,40 +16384,40 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerLongOperator + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) @@ -15748,17 +16454,24 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -15925,29 +16638,36 @@ STAGE PLANS: native: true predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - Map Join Vectorization: - className: VectorMapJoinLeftSemiMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + className: VectorMapJoinLeftSemiMultiKeyOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16114,20 +16834,13 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -16140,17 +16853,31 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16362,52 +17089,60 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Map Join Vectorization: - className: VectorMapJoinOuterLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 - Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + predicate: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + 1 Map 3 + Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16423,19 +17158,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16515,13 +17264,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -16598,27 +17347,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16630,23 +17393,64 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16658,7 +17462,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -16707,42 +17511,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -16758,13 +17530,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -16844,27 +17616,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16876,23 +17662,64 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16904,7 +17731,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -16957,38 +17784,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -17004,13 +17799,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17108,43 +17903,50 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinOuterLongOperator + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0 input vertices: - 1 Map 4 - Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + 1 Map 3 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17213,19 +18015,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17357,30 +18173,37 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17452,16 +18275,23 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17481,7 +18311,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -17625,30 +18455,37 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17720,16 +18557,23 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17749,7 +18593,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -17892,43 +18736,50 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: - 0 _col1 (type: string) - 1 value (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinOuterStringOperator + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0 + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + 1 Map 3 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17997,19 +18848,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: value (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: value (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18298,30 +19163,37 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18478,30 +19350,37 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18660,30 +19539,37 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18824,7 +19710,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: key is not null (type: boolean) + filterExpr: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -18832,39 +19718,47 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: key is not null (type: boolean) + predicate: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + Map Join Vectorization: + bigTableKeyExpressions: ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinLeftSemiMultiKeyOperator native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18891,11 +19785,12 @@ STAGE PLANS: predicate: (key < 15) (type: boolean) Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col1 + expressions: key (type: int), key is not null (type: boolean) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true + selectExpressions: IsNotNull(col 0:int) -> 3:boolean Statistics: Num rows: 17 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: @@ -18903,18 +19798,18 @@ STAGE PLANS: groupByMode: HASH native: false vectorProcessingMode: HASH - keys: _col1 (type: int), _col1 (type: int) - minReductionHashAggr: 0.0 + keys: _col0 (type: int), _col1 (type: boolean) + minReductionHashAggr: 0.35294116 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -19026,30 +19921,38 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + Map Join Vectorization: + bigTableKeyExpressions: ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinLeftSemiMultiKeyOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -19076,33 +19979,34 @@ STAGE PLANS: predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + selectExpressions: IsNotNull(col 1:string) -> 3:boolean + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.6666666 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -19185,16 +20089,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t3_n35 + alias: a filterExpr: (key > 5) (type: boolean) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -19202,35 +20106,45 @@ STAGE PLANS: className: VectorFilterOperator native: true predicate: (key > 5) (type: boolean) - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - keys: _col0 (type: int) - minReductionHashAggr: 0.5294118 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -19239,54 +20153,51 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + alias: t3_n35 + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string) + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + minReductionHashAggr: 0.5294118 + mode: hash outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: z sort order: + + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -19295,10 +20206,10 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -19313,13 +20224,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -19369,15 +20280,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t2_n87 - filterExpr: ((key > 5) and (value <= 'val_20')) (type: boolean) + alias: a + filterExpr: (key > 5) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -19385,36 +20296,47 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - keys: _col0 (type: int), _col1 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Statistics: Num rows: 6 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + Map Join Vectorization: + bigTableKeyExpressions: ConstantVectorExpression(val 1) -> 3:boolean + className: VectorMapJoinLeftSemiMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -19423,14 +20345,14 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan - alias: a - filterExpr: key is not null (type: boolean) + alias: t2_n87 + filterExpr: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -19438,39 +20360,37 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value is not null (type: boolean) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + selectExpressions: IsNotNull(col 1:string) -> 3:boolean + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int), _col1 (type: boolean) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -19479,10 +20399,10 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -19550,11 +20470,67 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 2) (type: boolean) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 Map Operator Tree: TableScan alias: t1_n148 @@ -19607,56 +20583,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -19671,13 +20598,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -19747,30 +20674,37 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -19940,30 +20874,37 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Operator + expressions: value (type: string), key (type: int) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 1067 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -19979,7 +20920,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - filterExpr: (2 * key) is not null (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -19987,14 +20928,15 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicate: (2 * key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: (2 * key) (type: int) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + selectExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: @@ -20008,10 +20950,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (2 * _col0) (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -20120,43 +21062,43 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerLongOperator + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) null sort order: zz @@ -20192,17 +21134,24 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -20369,30 +21318,37 @@ STAGE PLANS: native: true predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - Map Join Vectorization: - className: VectorMapJoinLeftSemiMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + Map Join Vectorization: + className: VectorMapJoinLeftSemiMultiKeyOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -20559,21 +21515,13 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -20586,18 +21534,33 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -20809,54 +21772,62 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Map Join Vectorization: - className: VectorMapJoinOuterLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 - Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + predicate: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -20872,19 +21843,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -20964,13 +21949,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -21047,27 +22032,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -21079,23 +22078,66 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -21107,7 +22149,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -21160,39 +22202,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -21208,13 +22217,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -21294,27 +22303,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -21326,23 +22349,66 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 2 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -21354,7 +22420,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -21407,39 +22473,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col5 is not null (type: boolean) - Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -21455,13 +22488,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -21559,45 +22592,52 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinOuterLongOperator + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0 input vertices: - 1 Map 4 - Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -21666,19 +22706,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -21810,31 +22864,38 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -21906,16 +22967,23 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -21935,7 +23003,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -22079,31 +23147,38 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -22175,16 +23250,23 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -22204,7 +23286,7 @@ STAGE PLANS: Full Outer Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -22347,45 +23429,52 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - HybridGraceHashJoin: true + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 keys: - 0 _col1 (type: string) - 1 value (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinOuterStringOperator + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0 + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -22454,19 +23543,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Reduce Output Operator - key expressions: value (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: value (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_complex_values.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_complex_values.q.out index ec8b1d9bc8..42fd615d36 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_complex_values.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_complex_values.q.out @@ -111,45 +111,53 @@ STAGE PLANS: predicateExpression: FilterLongColEqualLongScalar(col 0:int, val 100) predicate: (ssn = 100) (type: boolean) Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 100 (type: int) - 1 100 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: ConstantVectorExpression(val 100) -> 5:int - bigTableValueExpressions: col 2:string, col 3:string, col 4:struct - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: Supports Value Types [STRUCT] IS false - outputColumnNames: _col2, _col3, _col6 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 1 Data size: 257 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col6 (type: struct), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 0, 1] - Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumns: 2:struct + Select Operator + expressions: city (type: string), email (type: string), ROW__ID (type: struct) + outputColumnNames: _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 4] + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 100 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: ConstantVectorExpression(val 100) -> 5:int + bigTableValueExpressions: col 2:string, col 3:string, col 4:struct + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Supports Value Types [STRUCT] IS false + outputColumnNames: _col1, _col2, _col3 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 1 Data size: 257 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumns: 3:int - valueColumns: 0:string, 1:string + projectedOutputColumnNums: [2, 0, 1] Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: string), _col4 (type: string) + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 2:struct + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: 3:int + valueColumns: 0:string, 1:string + Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: string), _col4 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -194,24 +202,23 @@ STAGE PLANS: Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: ConstantVectorExpression(val 100) -> 5:int + keyExpressions: ConstantVectorExpression(val 1) -> 5:boolean native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: 100 (type: int) + keys: true (type: boolean) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: 100 (type: int) + key expressions: _col0 (type: boolean) null sort order: z sort order: + - Map-reduce partition columns: 100 (type: int) + Map-reduce partition columns: _col0 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumns: 1:int - keyExpressions: ConstantVectorExpression(val 100) -> 1:int + keyColumns: 0:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE @@ -244,12 +251,12 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:struct, VALUE._col1:string, VALUE._col2:string + dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string partitionColumnCount: 0 scratchColumnTypeNames: [bigint, string] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), 100 (type: int), 'updated name' (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + expressions: KEY.reducesinkkey0 (type: struct), 100 (type: int), 'updated name' (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator @@ -281,50 +288,52 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: KEY._col0:int + dataColumns: KEY._col0:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: MERGEPARTIAL - keyExpressions: ConstantVectorExpression(val 100) -> 1:int, ConstantVectorExpression(val 100) -> 2:int + keyExpressions: col 0:boolean native: false vectorProcessingMode: MERGE_PARTIAL projectedOutputColumnNums: [] - keys: 100 (type: int), 100 (type: int) + keys: KEY._col0 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + expressions: 100 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [] - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [1] + selectExpressions: ConstantVectorExpression(val 100) -> 1:int + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: ConstantVectorExpression(val 100) -> 2:int + keyExpressions: col 1:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: 100 (type: int) + keys: _col0 (type: int) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: 100 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: 100 (type: int) + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumns: 1:int - keyExpressions: ConstantVectorExpression(val 100) -> 1:int + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index 94fa3cfb3a..e5b18dfe23 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -455,7 +455,7 @@ POSTHOOK: query: select sum(sum(c1)) over() from e011_01_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_n0 #### A masked pattern was here #### -_c0 +sum_window_0 16.00 PREHOOK: query: explain vectorization detail select sum(sum(c1)) over( @@ -692,7 +692,7 @@ POSTHOOK: query: select sum(sum(c1)) over( POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_n0 #### A masked pattern was here #### -_c0 +sum_window_0 1.00 3.00 5.00 @@ -1014,7 +1014,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_n0 POSTHOOK: Input: default@e011_03_n0 #### A masked pattern was here #### -_c0 +sum_window_0 1.00 3.00 5.00 @@ -1336,7 +1336,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_n0 POSTHOOK: Input: default@e011_03_n0 #### A masked pattern was here #### -_c0 +sum_window_0 1.00 3.00 5.00 @@ -1585,7 +1585,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_n0 POSTHOOK: Input: default@e011_03_n0 #### A masked pattern was here #### -_c0 +sum_window_0 NULL NULL NULL @@ -1805,7 +1805,7 @@ POSTHOOK: query: select sum(sum(c1)) over() from e011_01_small POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_small #### A masked pattern was here #### -_c0 +sum_window_0 16.00 PREHOOK: query: explain vectorization detail select sum(sum(c1)) over( @@ -2042,7 +2042,7 @@ POSTHOOK: query: select sum(sum(c1)) over( POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_small #### A masked pattern was here #### -_c0 +sum_window_0 1.00 3.00 5.00 @@ -2364,7 +2364,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_small POSTHOOK: Input: default@e011_03_small #### A masked pattern was here #### -_c0 +sum_window_0 1.00 3.00 5.00 @@ -2686,7 +2686,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_small POSTHOOK: Input: default@e011_03_small #### A masked pattern was here #### -_c0 +sum_window_0 1.00 3.00 5.00 @@ -2935,7 +2935,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_small POSTHOOK: Input: default@e011_03_small #### A masked pattern was here #### -_c0 +sum_window_0 NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index 8ce9b76619..6a973ef6d2 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -854,7 +854,7 @@ group by key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -_c0 _c1 _c2 +_c0 _c1 percent_rank_window_2 1 1 0.0 1 1 0.0 1 1 0.0 diff --git ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out index ef7ea4ecf9..4746c37a35 100644 --- ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out +++ ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out @@ -55,101 +55,109 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 - src2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: src1 - filterExpr: ((key < 300) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 300) and (key < 10)) (type: boolean) - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - src2 + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src2 - filterExpr: (key < 300) (type: boolean) + filterExpr: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 300) (type: boolean) + predicate: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - - Stage: Stage-1 - Map Reduce - Map Operator Tree: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src3 - filterExpr: (key < 300) (type: boolean) + filterExpr: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 300) (type: boolean) + predicate: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 365 Data size: 65005 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 65005 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - null sort order: zzz - sort order: +++ - Statistics: Num rows: 365 Data size: 65005 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 1 to 2 + filter predicates: + 0 + 1 {(UDFToDouble(KEY.reducesinkkey0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 8 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + null sort order: zzz + sort order: +++ + Statistics: Num rows: 8 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 65005 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 365 Data size: 65005 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -226,81 +234,89 @@ STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:src1 Fetch Operator limit: -1 - src2 + $hdt$_1:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:src1 TableScan alias: src1 - filterExpr: ((key < 300) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 300) and (key < 10)) (type: boolean) - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - src2 + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $hdt$_1:src2 TableScan alias: src2 - filterExpr: (key < 300) (type: boolean) + filterExpr: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 300) (type: boolean) + predicate: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 300) (type: boolean) + filterExpr: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 300) (type: boolean) + predicate: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 2112 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 1 to 2 + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 2112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) null sort order: zzz sort order: +++ - Statistics: Num rows: 4 Data size: 2112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) Execution mode: vectorized Local Work: @@ -309,10 +325,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 2112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 2112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/mapjoin_test_outer.q.out ql/src/test/results/clientpositive/mapjoin_test_outer.q.out index dbc87d5f8d..89868cdf2d 100644 --- ql/src/test/results/clientpositive/mapjoin_test_outer.q.out +++ ql/src/test/results/clientpositive/mapjoin_test_outer.q.out @@ -248,76 +248,104 @@ POSTHOOK: Input: default@dest_2_n0 POSTHOOK: Input: default@src1 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 - src2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: src1 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - src2 + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src2 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 9 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - - Stage: Stage-1 - Map Reduce - Map Operator Tree: + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) TableScan alias: src3 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 55 Data size: 9625 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 9625 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 55 Data size: 9625 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 21 Data size: 11046 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 21 Data size: 11046 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 9625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 11046 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 55 Data size: 9625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 11046 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1087,57 +1115,77 @@ STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - src1 + $hdt$_0:src1 Fetch Operator limit: -1 - src2 + $hdt$_1:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 + $hdt$_0:src1 TableScan alias: src1 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - src2 + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $hdt$_1:src2 TableScan alias: src2 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 9 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: src3 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 21 Data size: 11046 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 21 Data size: 11046 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 21 Data size: 11046 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 21 Data size: 11046 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/no_hooks.q.out ql/src/test/results/clientpositive/no_hooks.q.out index ffff0752fc..7583863800 100644 --- ql/src/test/results/clientpositive/no_hooks.q.out +++ ql/src/test/results/clientpositive/no_hooks.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product POSTHOOK: query: EXPLAIN SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value POSTHOOK: type: QUERY @@ -15,28 +15,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -44,18 +52,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -86,7 +90,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product POSTHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/parallel_join0.q.out ql/src/test/results/clientpositive/parallel_join0.q.out index cda0f3f0a5..9f2e1814bb 100644 --- ql/src/test/results/clientpositive/parallel_join0.q.out +++ ql/src/test/results/clientpositive/parallel_join0.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -30,10 +30,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -46,10 +46,10 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN FORMATTED SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -125,8 +125,8 @@ SELECT src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"cboInfo":"Plan not optimized by CBO because the statement has sort by","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","filterExpr:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_13","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","columnExprMap:":{"_col0":"key","_col1":"value"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_2","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0","VALUE._col1":"_col1"},"null sort order:":"","sort order:":"","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_6"}}}}}}}},{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","filterExpr:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_3","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_14","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","columnExprMap:":{"_col0":"key","_col1":"value"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0","VALUE._col1":"_col1"},"null sort order:":"","sort order:":"","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_7"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"columnExprMap:":{"_col0":"0:VALUE._col0","_col1":"0:VALUE._col1","_col2":"1:VALUE._col0","_col3":"1:VALUE._col1"},"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"JOIN_8","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_15"}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"columns:":["_col0","_col1","_col2","_col3"],"OperatorId:":"TS_16","children":{"Reduce Output Operator":{"columnExprMap:":{"KEY.reducesinkkey0":"_col0","KEY.reducesinkkey1":"_col1","KEY.reducesinkkey2":"_col2","KEY.reducesinkkey3":"_col3"},"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","null sort order:":"zzzz","sort order:":"++++","Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"RS_10"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","columnExprMap:":{"_col0":"KEY.reducesinkkey0","_col1":"KEY.reducesinkkey1","_col2":"KEY.reducesinkkey2","_col3":"KEY.reducesinkkey3"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_11","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_12"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}} -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"key\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"value\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"key\",\n \"ndv\": 316\n },\n {\n \"name\": \"value\",\n \"ndv\": 307\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": {\n \"name\": \"<\",\n \"kind\": \"LESS_THAN\",\n \"syntax\": \"BINARY\"\n },\n \"operands\": [\n {\n \"op\": {\n \"name\": \"CAST\",\n \"kind\": \"CAST\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"type\": {\n \"type\": \"DOUBLE\",\n \"nullable\": true\n }\n },\n {\n \"literal\": 10,\n \"type\": {\n \"type\": \"DOUBLE\",\n \"nullable\": false\n }\n }\n ]\n },\n \"rowCount\": 166.66666666666666\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"key\",\n \"value\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ],\n \"rowCount\": 166.66666666666666\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"literal\": true,\n \"type\": {\n \"type\": \"BOOLEAN\",\n \"nullable\": false\n }\n },\n \"joinType\": \"inner\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"2\",\n \"2\"\n ],\n \"rowCount\": 27777.777777777774\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"key\",\n \"value\",\n \"key0\",\n \"value0\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n }\n ],\n \"rowCount\": 27777.777777777774\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange\",\n \"distribution\": {\n \"type\": \"ANY\"\n },\n \"collation\": [\n {\n \"field\": 0,\n \"direction\": \"ASCENDING\",\n \"nulls\": \"LAST\"\n },\n {\n \"field\": 1,\n \"direction\": \"ASCENDING\",\n \"nulls\": \"LAST\"\n },\n {\n \"field\": 2,\n \"direction\": \"ASCENDING\",\n \"nulls\": \"LAST\"\n },\n {\n \"field\": 3,\n \"direction\": \"ASCENDING\",\n \"nulls\": \"LAST\"\n }\n ],\n \"rowCount\": 27777.777777777774\n }\n ]\n}","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","filterExpr:":"(UDFToDouble(key) < 10.0D) (type: boolean)","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Filter Operator":{"predicate:":"(UDFToDouble(key) < 10.0D) (type: boolean)","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_13","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","columnExprMap:":{"_col0":"key","_col1":"value"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_2","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0","VALUE._col1":"_col1"},"null sort order:":"","sort order:":"","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_6"}}}}}}}},{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","filterExpr:":"(UDFToDouble(key) < 10.0D) (type: boolean)","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_3","children":{"Filter Operator":{"predicate:":"(UDFToDouble(key) < 10.0D) (type: boolean)","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_14","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","columnExprMap:":{"_col0":"key","_col1":"value"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0","VALUE._col1":"_col1"},"null sort order:":"","sort order:":"","Statistics:":"Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_7"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"columnExprMap:":{"_col0":"0:VALUE._col0","_col1":"0:VALUE._col1","_col2":"1:VALUE._col0","_col3":"1:VALUE._col1"},"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"JOIN_8","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_15"}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"columns:":["_col0","_col1","_col2","_col3"],"OperatorId:":"TS_16","children":{"Reduce Output Operator":{"columnExprMap:":{"KEY.reducesinkkey0":"_col0","KEY.reducesinkkey1":"_col1","KEY.reducesinkkey2":"_col2","KEY.reducesinkkey3":"_col3"},"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","null sort order:":"zzzz","sort order:":"++++","Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"RS_10"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","columnExprMap:":{"_col0":"KEY.reducesinkkey0","_col1":"KEY.reducesinkkey1","_col2":"KEY.reducesinkkey2","_col3":"KEY.reducesinkkey3"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_11","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_12"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}} +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM (SELECT * FROM src WHERE src.key < 10) src1 diff --git ql/src/test/results/clientpositive/pcs.q.out ql/src/test/results/clientpositive/pcs.q.out index 15c6c212fd..d10a0708a4 100644 --- ql/src/test/results/clientpositive/pcs.q.out +++ ql/src/test/results/clientpositive/pcs.q.out @@ -1171,68 +1171,60 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: PARTIAL + filterExpr: ((struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: PARTIAL + predicate: ((struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: Num rows: 2 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) - null sort order: zzz - sort order: +++ - Statistics: Num rows: 2 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL - tag: -1 - auto parallelism: false + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + auto parallelism: false TableScan alias: y - filterExpr: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: PARTIAL + filterExpr: ((struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: PARTIAL + predicate: ((struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: Num rows: 2 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) - null sort order: zzz - sort order: +++ - Statistics: Num rows: 2 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL - tag: -1 - auto parallelism: false + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + auto parallelism: false Needs Tagging: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query51.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query51.q.out index a072421bad..e3184ecb05 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query51.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query51.q.out @@ -100,26 +100,24 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[>($4, $5)]) HiveProject(item_sk=[CASE(IS NOT NULL($3), $3, $0)], d_date=[CASE(IS NOT NULL($4), $4, $1)], web_sales=[$5], store_sales=[$2], max_window_0=[max($5) OVER (PARTITION BY CASE(IS NOT NULL($3), $3, $0) ORDER BY CASE(IS NOT NULL($4), $4, $1) NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], max_window_1=[max($2) OVER (PARTITION BY CASE(IS NOT NULL($3), $3, $0) ORDER BY CASE(IS NOT NULL($4), $4, $1) NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)]) HiveJoin(condition=[AND(=($3, $0), =($4, $1))], joinType=[full], algorithm=[none], cost=[not available]) - HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[$2]) - HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], window_col_0=[$2]) - HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) - HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[$2]) - HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], window_col_0=[$2]) - HiveProject(ws_item_sk=[$0], d_date=[$1], $f2=[$2]) - HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_sales_price=[$21]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)]) + HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)]) + HiveProject(ws_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query51.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query51.q.out index 3d81b6bcb5..fbb5b458eb 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query51.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query51.q.out @@ -100,26 +100,24 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[>($4, $5)]) HiveProject(item_sk=[CASE(IS NOT NULL($3), $3, $0)], d_date=[CASE(IS NOT NULL($4), $4, $1)], web_sales=[$5], store_sales=[$2], max_window_0=[max($5) OVER (PARTITION BY CASE(IS NOT NULL($3), $3, $0) ORDER BY CASE(IS NOT NULL($4), $4, $1) NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], max_window_1=[max($2) OVER (PARTITION BY CASE(IS NOT NULL($3), $3, $0) ORDER BY CASE(IS NOT NULL($4), $4, $1) NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)]) HiveJoin(condition=[AND(=($3, $0), =($4, $1))], joinType=[full], algorithm=[none], cost=[not available]) - HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[$2]) - HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], window_col_0=[$2]) - HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) - HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_sales_price=[$13]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[$2]) - HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], window_col_0=[$2]) - HiveProject(ws_item_sk=[$0], d_date=[$1], $f2=[$2]) - HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_sales_price=[$21]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)]) + HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_sales_price=[$13]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)]) + HiveProject(ws_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_sales_price=[$21]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out index 2ceca679e5..fdfb8b3998 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out @@ -114,75 +114,75 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_119] - Limit [LIM_118] (rows=100 width=636) + File Output Operator [FS_117] + Limit [LIM_116] (rows=100 width=636) Number of rows:100 - Select Operator [SEL_117] (rows=2095597606 width=636) + Select Operator [SEL_115] (rows=2095597606 width=636) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] - Select Operator [SEL_115] (rows=2095597606 width=636) + SHUFFLE [RS_114] + Select Operator [SEL_113] (rows=2095597606 width=636) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_114] (rows=2095597606 width=420) + Top N Key Operator [TNK_112] (rows=2095597606 width=420) keys:CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END, CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END,top n:100 - Filter Operator [FIL_113] (rows=2095597606 width=420) + Filter Operator [FIL_111] (rows=2095597606 width=420) predicate:(max_window_0 > max_window_1) - PTF Operator [PTF_112] (rows=6286792818 width=420) + PTF Operator [PTF_110] (rows=6286792818 width=420) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END ASC NULLS LAST","partition by:":"CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END"}] - Select Operator [SEL_111] (rows=6286792818 width=420) + Select Operator [SEL_109] (rows=6286792818 width=420) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_43] + SHUFFLE [RS_41] PartitionCols:CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END - Merge Join Operator [MERGEJOIN_88] (rows=6286792818 width=420) - Conds:RS_40._col0, _col1=RS_41._col0, _col1(Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_86] (rows=6286792818 width=420) + Conds:RS_38._col0, _col1=RS_39._col0, _col1(Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_41] + SHUFFLE [RS_39] PartitionCols:_col0, _col1 - Select Operator [SEL_37] (rows=19832154 width=210) + Select Operator [SEL_36] (rows=19832154 width=210) Output:["_col0","_col1","_col2"] - PTF Operator [PTF_36] (rows=19832154 width=210) + PTF Operator [PTF_35] (rows=19832154 width=210) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}] - Group By Operator [GBY_32] (rows=19832154 width=210) + Group By Operator [GBY_31] (rows=19832154 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_31] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_30] (rows=143966864 width=210) + Group By Operator [GBY_29] (rows=143966864 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_87] (rows=143966864 width=209) - Conds:RS_109._col0=RS_93._col0(Inner),Output:["_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_85] (rows=143966864 width=209) + Conds:RS_107._col0=RS_91._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_93] + SHUFFLE [RS_91] PartitionCols:_col0 - Select Operator [SEL_90] (rows=317 width=98) + Select Operator [SEL_88] (rows=317 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_89] (rows=317 width=102) + Filter Operator [FIL_87] (rows=317 width=102) predicate:d_month_seq BETWEEN 1212 AND 1223 TableScan [TS_3] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] + SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_108] (rows=143966864 width=119) + Select Operator [SEL_106] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_107] (rows=143966864 width=119) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_27_date_dim_d_date_sk_min) AND DynamicValue(RS_27_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_27_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=144002668 width=119) + Filter Operator [FIL_105] (rows=143966864 width=119) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_19] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) + BROADCAST [RS_104] + Group By Operator [GBY_103] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_98] - Group By Operator [GBY_96] (rows=1 width=12) + SHUFFLE [RS_96] + Group By Operator [GBY_94] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_94] (rows=317 width=4) + Select Operator [SEL_92] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_90] + Please refer to the previous Select Operator [SEL_88] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_38] PartitionCols:_col0, _col1 Select Operator [SEL_17] (rows=19832154 width=210) Output:["_col0","_col1","_col2"] @@ -195,30 +195,30 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_10] (rows=550076554 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_86] (rows=550076554 width=204) - Conds:RS_103._col0=RS_91._col0(Inner),Output:["_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_84] (rows=550076554 width=204) + Conds:RS_101._col0=RS_89._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_91] + SHUFFLE [RS_89] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_90] + Please refer to the previous Select Operator [SEL_88] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_103] + SHUFFLE [RS_101] PartitionCols:_col0 - Select Operator [SEL_102] (rows=550076554 width=114) + Select Operator [SEL_100] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_101] (rows=550076554 width=114) + Filter Operator [FIL_99] (rows=550076554 width=114) predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_sales_price"] <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_100] - Group By Operator [GBY_99] (rows=1 width=12) + BROADCAST [RS_98] + Group By Operator [GBY_97] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_97] - Group By Operator [GBY_95] (rows=1 width=12) + SHUFFLE [RS_95] + Group By Operator [GBY_93] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_92] (rows=317 width=4) + Select Operator [SEL_90] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_90] + Please refer to the previous Select Operator [SEL_88] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out index b2bde8d48c..404ed9cdfb 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out @@ -100,120 +100,120 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_174] - Limit [LIM_173] (rows=100 width=492) + File Output Operator [FS_173] + Limit [LIM_172] (rows=100 width=492) Number of rows:100 - Select Operator [SEL_172] (rows=720 width=492) + Select Operator [SEL_171] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_171] - Select Operator [SEL_170] (rows=720 width=492) + SHUFFLE [RS_170] + Select Operator [SEL_169] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_169] (rows=720 width=304) + Top N Key Operator [TNK_168] (rows=720 width=304) keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100 - PTF Operator [PTF_168] (rows=720 width=304) + PTF Operator [PTF_167] (rows=720 width=304) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_167] (rows=720 width=304) + Select Operator [SEL_166] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_166] + SHUFFLE [RS_165] PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_165] (rows=720 width=304) + Select Operator [SEL_164] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_164] (rows=720 width=304) + Group By Operator [GBY_163] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_50] + SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_49] (rows=430560 width=304) + Group By Operator [GBY_48] (rows=430560 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L - Select Operator [SEL_47] (rows=525329897 width=290) + Select Operator [SEL_46] (rows=525329897 width=290) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_138] (rows=525329897 width=290) - Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_137] (rows=525329897 width=290) + Conds:RS_43._col1=RS_44._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] + SHUFFLE [RS_43] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_134] (rows=525329897 width=110) - Conds:RS_149._col0=RS_141._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_133] (rows=525329897 width=110) + Conds:RS_148._col0=RS_140._col0(Inner),Output:["_col1","_col2"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_140] (rows=317 width=8) + Select Operator [SEL_139] (rows=317 width=8) Output:["_col0"] - Filter Operator [FIL_139] (rows=317 width=8) + Filter Operator [FIL_138] (rows=317 width=8) predicate:d_month_seq BETWEEN 1212 AND 1223 TableScan [TS_3] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_148] PartitionCols:_col0 - Select Operator [SEL_148] (rows=525329897 width=114) + Select Operator [SEL_147] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_147] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_42_d1_d_date_sk_min) AND DynamicValue(RS_42_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_42_d1_d_date_sk_bloom_filter))) + Filter Operator [FIL_146] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_41_d1_d_date_sk_min) AND DynamicValue(RS_41_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_41_d1_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_146] - Group By Operator [GBY_145] (rows=1 width=12) + BROADCAST [RS_145] + Group By Operator [GBY_144] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] - Group By Operator [GBY_143] (rows=1 width=12) + SHUFFLE [RS_143] + Group By Operator [GBY_142] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_142] (rows=317 width=4) + Select Operator [SEL_141] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_140] + Please refer to the previous Select Operator [SEL_139] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_45] + SHUFFLE [RS_44] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_137] (rows=556 width=188) - Conds:RS_163._col2=RS_160._col0(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_136] (rows=556 width=188) + Conds:RS_162._col2=RS_159._col0(Inner),Output:["_col0","_col1","_col2"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] + SHUFFLE [RS_162] PartitionCols:_col2 - Select Operator [SEL_162] (rows=1704 width=188) + Select Operator [SEL_161] (rows=1704 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_161] (rows=1704 width=188) + Filter Operator [FIL_160] (rows=1704 width=188) predicate:s_state is not null TableScan [TS_6] (rows=1704 width=188) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_159] (rows=16 width=86) + Select Operator [SEL_158] (rows=16 width=86) Output:["_col0"] - Filter Operator [FIL_158] (rows=16 width=198) + Filter Operator [FIL_157] (rows=16 width=198) predicate:(rank_window_0 <= 5) - PTF Operator [PTF_157] (rows=49 width=198) + PTF Operator [PTF_156] (rows=49 width=198) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_156] (rows=49 width=198) + Select Operator [SEL_155] (rows=49 width=198) Output:["_col0","_col1"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_155] + SHUFFLE [RS_154] PartitionCols:_col0 - Top N Key Operator [TNK_154] (rows=49 width=198) + Top N Key Operator [TNK_153] (rows=49 width=198) PartitionCols:_col0,keys:_col0, _col1,top n:6 - Group By Operator [GBY_153] (rows=49 width=198) + Group By Operator [GBY_152] (rows=49 width=198) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 Group By Operator [GBY_25] (rows=19404 width=198) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_136] (rows=525329897 width=192) - Conds:RS_21._col1=RS_152._col0(Inner),Output:["_col2","_col5"] + Merge Join Operator [MERGEJOIN_135] (rows=525329897 width=192) + Conds:RS_21._col1=RS_151._col0(Inner),Output:["_col2","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_134] + Please refer to the previous Merge Join Operator [MERGEJOIN_133] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_151] PartitionCols:_col0 - Select Operator [SEL_151] (rows=1704 width=90) + Select Operator [SEL_150] (rows=1704 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=1704 width=90) + Filter Operator [FIL_149] (rows=1704 width=90) predicate:s_state is not null TableScan [TS_15] (rows=1704 width=90) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] diff --git ql/src/test/results/clientpositive/perf/tez/query51.q.out ql/src/test/results/clientpositive/perf/tez/query51.q.out index 5dc11619a4..afc7909bb2 100644 --- ql/src/test/results/clientpositive/perf/tez/query51.q.out +++ ql/src/test/results/clientpositive/perf/tez/query51.q.out @@ -114,75 +114,75 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_119] - Limit [LIM_118] (rows=100 width=636) + File Output Operator [FS_117] + Limit [LIM_116] (rows=100 width=636) Number of rows:100 - Select Operator [SEL_117] (rows=2095597606 width=636) + Select Operator [SEL_115] (rows=2095597606 width=636) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] - Select Operator [SEL_115] (rows=2095597606 width=636) + SHUFFLE [RS_114] + Select Operator [SEL_113] (rows=2095597606 width=636) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_114] (rows=2095597606 width=420) + Top N Key Operator [TNK_112] (rows=2095597606 width=420) keys:CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END, CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END,top n:100 - Filter Operator [FIL_113] (rows=2095597606 width=420) + Filter Operator [FIL_111] (rows=2095597606 width=420) predicate:(max_window_0 > max_window_1) - PTF Operator [PTF_112] (rows=6286792818 width=420) + PTF Operator [PTF_110] (rows=6286792818 width=420) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END ASC NULLS LAST","partition by:":"CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END"}] - Select Operator [SEL_111] (rows=6286792818 width=420) + Select Operator [SEL_109] (rows=6286792818 width=420) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_43] + SHUFFLE [RS_41] PartitionCols:CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END - Merge Join Operator [MERGEJOIN_88] (rows=6286792818 width=420) - Conds:RS_40._col0, _col1=RS_41._col0, _col1(Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_86] (rows=6286792818 width=420) + Conds:RS_38._col0, _col1=RS_39._col0, _col1(Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_41] + SHUFFLE [RS_39] PartitionCols:_col0, _col1 - Select Operator [SEL_37] (rows=19832154 width=210) + Select Operator [SEL_36] (rows=19832154 width=210) Output:["_col0","_col1","_col2"] - PTF Operator [PTF_36] (rows=19832154 width=210) + PTF Operator [PTF_35] (rows=19832154 width=210) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}] - Group By Operator [GBY_32] (rows=19832154 width=210) + Group By Operator [GBY_31] (rows=19832154 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_31] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_30] (rows=143966864 width=210) + Group By Operator [GBY_29] (rows=143966864 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_87] (rows=143966864 width=209) - Conds:RS_109._col0=RS_93._col0(Inner),Output:["_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_85] (rows=143966864 width=209) + Conds:RS_107._col0=RS_91._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_93] + SHUFFLE [RS_91] PartitionCols:_col0 - Select Operator [SEL_90] (rows=317 width=98) + Select Operator [SEL_88] (rows=317 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_89] (rows=317 width=102) + Filter Operator [FIL_87] (rows=317 width=102) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] + SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_108] (rows=143966864 width=119) + Select Operator [SEL_106] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_107] (rows=143966864 width=119) - predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_27_date_dim_d_date_sk_min) AND DynamicValue(RS_27_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_27_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=144002668 width=119) + Filter Operator [FIL_105] (rows=143966864 width=119) + predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_19] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) + BROADCAST [RS_104] + Group By Operator [GBY_103] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_98] - Group By Operator [GBY_96] (rows=1 width=12) + SHUFFLE [RS_96] + Group By Operator [GBY_94] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_94] (rows=317 width=4) + Select Operator [SEL_92] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_90] + Please refer to the previous Select Operator [SEL_88] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_38] PartitionCols:_col0, _col1 Select Operator [SEL_17] (rows=19832154 width=210) Output:["_col0","_col1","_col2"] @@ -195,30 +195,30 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_10] (rows=550076554 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_86] (rows=550076554 width=204) - Conds:RS_103._col0=RS_91._col0(Inner),Output:["_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_84] (rows=550076554 width=204) + Conds:RS_101._col0=RS_89._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_91] + SHUFFLE [RS_89] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_90] + Please refer to the previous Select Operator [SEL_88] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_103] + SHUFFLE [RS_101] PartitionCols:_col0 - Select Operator [SEL_102] (rows=550076554 width=114) + Select Operator [SEL_100] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_101] (rows=550076554 width=114) + Filter Operator [FIL_99] (rows=550076554 width=114) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_sales_price"] <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_100] - Group By Operator [GBY_99] (rows=1 width=12) + BROADCAST [RS_98] + Group By Operator [GBY_97] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_97] - Group By Operator [GBY_95] (rows=1 width=12) + SHUFFLE [RS_95] + Group By Operator [GBY_93] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_92] (rows=317 width=4) + Select Operator [SEL_90] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_90] + Please refer to the previous Select Operator [SEL_88] diff --git ql/src/test/results/clientpositive/perf/tez/query70.q.out ql/src/test/results/clientpositive/perf/tez/query70.q.out index 6107ec0367..2c32e71a8a 100644 --- ql/src/test/results/clientpositive/perf/tez/query70.q.out +++ ql/src/test/results/clientpositive/perf/tez/query70.q.out @@ -100,120 +100,120 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_172] - Limit [LIM_171] (rows=100 width=492) + File Output Operator [FS_171] + Limit [LIM_170] (rows=100 width=492) Number of rows:100 - Select Operator [SEL_170] (rows=720 width=492) + Select Operator [SEL_169] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_169] - Select Operator [SEL_168] (rows=720 width=492) + SHUFFLE [RS_168] + Select Operator [SEL_167] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_167] (rows=720 width=304) + Top N Key Operator [TNK_166] (rows=720 width=304) keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100 - PTF Operator [PTF_166] (rows=720 width=304) + PTF Operator [PTF_165] (rows=720 width=304) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_165] (rows=720 width=304) + Select Operator [SEL_164] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_163] PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_163] (rows=720 width=304) + Select Operator [SEL_162] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_162] (rows=720 width=304) + Group By Operator [GBY_161] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_49] + SHUFFLE [RS_48] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=135360 width=304) + Group By Operator [GBY_47] (rows=135360 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L - Select Operator [SEL_46] (rows=171536292 width=280) + Select Operator [SEL_45] (rows=171536292 width=280) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_136] (rows=171536292 width=280) - Conds:RS_43._col7=RS_161._col0(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_135] (rows=171536292 width=280) + Conds:RS_42._col7=RS_160._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] + SHUFFLE [RS_160] PartitionCols:_col0 - Select Operator [SEL_160] (rows=16 width=86) + Select Operator [SEL_159] (rows=16 width=86) Output:["_col0"] - Filter Operator [FIL_159] (rows=16 width=198) + Filter Operator [FIL_158] (rows=16 width=198) predicate:(rank_window_0 <= 5) - PTF Operator [PTF_158] (rows=49 width=198) + PTF Operator [PTF_157] (rows=49 width=198) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_157] (rows=49 width=198) + Select Operator [SEL_156] (rows=49 width=198) Output:["_col0","_col1"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_155] PartitionCols:_col0 - Top N Key Operator [TNK_155] (rows=49 width=198) + Top N Key Operator [TNK_154] (rows=49 width=198) PartitionCols:_col0,keys:_col0, _col1,top n:6 - Group By Operator [GBY_154] (rows=49 width=198) + Group By Operator [GBY_153] (rows=49 width=198) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 Group By Operator [GBY_25] (rows=19404 width=198) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_135] (rows=525329897 width=192) - Conds:RS_21._col1=RS_153._col0(Inner),Output:["_col2","_col5"] + Merge Join Operator [MERGEJOIN_134] (rows=525329897 width=192) + Conds:RS_21._col1=RS_152._col0(Inner),Output:["_col2","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_132] (rows=525329897 width=110) - Conds:RS_147._col0=RS_139._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_131] (rows=525329897 width=110) + Conds:RS_146._col0=RS_138._col0(Inner),Output:["_col1","_col2"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_138] (rows=317 width=8) + Select Operator [SEL_137] (rows=317 width=8) Output:["_col0"] - Filter Operator [FIL_137] (rows=317 width=8) + Filter Operator [FIL_136] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] + SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_146] (rows=525329897 width=114) + Select Operator [SEL_145] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_145] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) + Filter Operator [FIL_144] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_37_d1_d_date_sk_min) AND DynamicValue(RS_37_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_37_d1_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_144] - Group By Operator [GBY_143] (rows=1 width=12) + BROADCAST [RS_143] + Group By Operator [GBY_142] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_142] - Group By Operator [GBY_141] (rows=1 width=12) + SHUFFLE [RS_141] + Group By Operator [GBY_140] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_140] (rows=317 width=4) + Select Operator [SEL_139] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_138] + Please refer to the previous Select Operator [SEL_137] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_152] PartitionCols:_col0 - Select Operator [SEL_152] (rows=1704 width=90) + Select Operator [SEL_151] (rows=1704 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_151] (rows=1704 width=90) + Filter Operator [FIL_150] (rows=1704 width=90) predicate:(s_store_sk is not null and s_state is not null) TableScan [TS_15] (rows=1704 width=90) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_43] + SHUFFLE [RS_42] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_133] (rows=525329897 width=290) - Conds:RS_40._col1=RS_150._col0(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_132] (rows=525329897 width=290) + Conds:RS_39._col1=RS_149._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_39] PartitionCols:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_132] + Please refer to the previous Merge Join Operator [MERGEJOIN_131] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_149] (rows=1704 width=188) + Select Operator [SEL_148] (rows=1704 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_148] (rows=1704 width=188) + Filter Operator [FIL_147] (rows=1704 width=188) predicate:(s_state is not null and s_store_sk is not null) TableScan [TS_6] (rows=1704 width=188) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"] diff --git ql/src/test/results/clientpositive/ppd_join4.q.out ql/src/test/results/clientpositive/ppd_join4.q.out index 67d8e21133..5e4358864d 100644 --- ql/src/test/results/clientpositive/ppd_join4.q.out +++ ql/src/test/results/clientpositive/ppd_join4.q.out @@ -30,6 +30,7 @@ POSTHOOK: Input: default@dual POSTHOOK: Output: default@test_tbl POSTHOOK: Lineage: test_tbl.id SIMPLE [] POSTHOOK: Lineage: test_tbl.name SIMPLE [] +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select t2.* from @@ -64,13 +65,11 @@ STAGE PLANS: predicate: ((name = 'c') and (id = 'a')) (type: boolean) Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: 'a' (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: 'a' (type: string) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: t3 filterExpr: (id = 'a') (type: boolean) @@ -78,27 +77,27 @@ STAGE PLANS: Filter Operator predicate: (id = 'a') (type: boolean) Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: 'a' (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: 'a' (type: string) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 id (type: string) + 0 + 1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'a' (type: string), 'c' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -110,6 +109,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select t2.* from (select id,name from (select id,name from test_tbl) t1 sort by id) t2 diff --git ql/src/test/results/clientpositive/sort.q.out ql/src/test/results/clientpositive/sort.q.out index 42cbd83fdc..8c11f0f940 100644 --- ql/src/test/results/clientpositive/sort.q.out +++ ql/src/test/results/clientpositive/sort.q.out @@ -557,3 +557,562 @@ POSTHOOK: Input: default@src 97 val_97 98 val_98 98 val_98 +PREHOOK: query: EXPLAIN +SELECT x.* FROM SRC x SORT BY 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x.* FROM SRC x SORT BY 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM SRC x SORT BY 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM SRC x SORT BY 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 diff --git ql/src/test/results/clientpositive/spark/auto_join0.q.out ql/src/test/results/clientpositive/spark/auto_join0.q.out index 300c087686..4e59695bff 100644 --- ql/src/test/results/clientpositive/spark/auto_join0.q.out +++ ql/src/test/results/clientpositive/spark/auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -41,10 +41,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -81,10 +81,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -115,17 +115,21 @@ STAGE PLANS: input vertices: 1 Reducer 5 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized Reduce Operator Tree: @@ -148,7 +152,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( SELECT src1.key as k1, src1.value as v1, diff --git ql/src/test/results/clientpositive/spark/auto_join15.q.out ql/src/test/results/clientpositive/spark/auto_join15.q.out index 096d9857a0..6ee172a825 100644 --- ql/src/test/results/clientpositive/spark/auto_join15.q.out +++ ql/src/test/results/clientpositive/spark/auto_join15.q.out @@ -37,10 +37,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -60,31 +64,35 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/auto_join20.q.out ql/src/test/results/clientpositive/spark/auto_join20.q.out index 1b5150d419..a6e53a5164 100644 --- ql/src/test/results/clientpositive/spark/auto_join20.q.out +++ ql/src/test/results/clientpositive/spark/auto_join20.q.out @@ -32,20 +32,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -53,20 +57,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -82,38 +90,42 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join 0 to 2 + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -192,20 +204,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key < 15) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -213,20 +229,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key < 15)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 10) and (key < 15)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -242,38 +262,42 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join 0 to 2 + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/auto_join21.q.out ql/src/test/results/clientpositive/spark/auto_join21.q.out index 6cd15fd348..4f6e9ddd66 100644 --- ql/src/test/results/clientpositive/spark/auto_join21.q.out +++ ql/src/test/results/clientpositive/spark/auto_join21.q.out @@ -23,15 +23,22 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -39,20 +46,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -68,26 +78,26 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join 1 to 2 + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join23.q.out ql/src/test/results/clientpositive/spark/auto_join23.q.out index 268eb76741..401928f95c 100644 --- ql/src/test/results/clientpositive/spark/auto_join23.q.out +++ ql/src/test/results/clientpositive/spark/auto_join23.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value PREHOOK: type: QUERY @@ -23,15 +23,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -46,24 +50,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) @@ -94,7 +98,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/spark/auto_join28.q.out ql/src/test/results/clientpositive/spark/auto_join28.q.out index fc55988949..20734e6ba2 100644 --- ql/src/test/results/clientpositive/spark/auto_join28.q.out +++ ql/src/test/results/clientpositive/spark/auto_join28.q.out @@ -23,15 +23,22 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -39,20 +46,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -68,26 +78,26 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join 1 to 2 + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) @@ -142,20 +152,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) + filterExpr: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 10) (type: boolean) + predicate: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -163,20 +177,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -192,26 +210,26 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 1 to 2 - filter predicates: - 0 {(key < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Left Outer Join 1 to 2 + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 3 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) @@ -266,20 +284,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -287,20 +309,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -316,26 +342,26 @@ STAGE PLANS: TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Left Outer Join 1 to 2 - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Left Outer Join 1 to 2 + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) @@ -377,50 +403,81 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src2 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) > 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -436,32 +493,28 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 - 1 {(key > 10)} - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + input vertices: + 0 Map 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -471,10 +524,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_join29.q.out ql/src/test/results/clientpositive/spark/auto_join29.q.out index 4bb41d7f10..afa4f03d11 100644 --- ql/src/test/results/clientpositive/spark/auto_join29.q.out +++ ql/src/test/results/clientpositive/spark/auto_join29.q.out @@ -23,15 +23,22 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -39,20 +46,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -68,26 +78,26 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join 1 to 2 + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) @@ -650,20 +660,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) + filterExpr: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 10) (type: boolean) + predicate: (UDFToDouble(key) > 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -671,20 +685,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -700,26 +718,26 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 1 to 2 - filter predicates: - 0 {(key < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Left Outer Join 1 to 2 + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 3 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) @@ -1282,20 +1300,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1303,20 +1325,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1332,26 +1358,26 @@ STAGE PLANS: TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Left Outer Join 1 to 2 - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Left Outer Join 1 to 2 + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) @@ -1913,50 +1939,81 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src2 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) > 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1972,32 +2029,28 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 - 1 {(key > 10)} - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + input vertices: + 0 Map 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2007,10 +2060,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2566,16 +2619,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2583,16 +2639,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2607,33 +2667,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join 1 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2643,10 +2702,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2690,20 +2749,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2711,20 +2773,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2740,26 +2808,26 @@ STAGE PLANS: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join 1 to 2 + filter predicates: + 0 + 1 + 2 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) @@ -3318,45 +3386,46 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(_col0) < 10.0D) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3364,53 +3433,53 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Inner Join 1 to 2 - filter predicates: - 0 {(key < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 1 Map 2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 2 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3430,6 +3499,7 @@ POSTHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY @@ -3441,12 +3511,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -3454,41 +3525,59 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 Execution mode: vectorized Local Work: Map Reduce Local Work - Map 4 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: src3 - filterExpr: (key < 10) (type: boolean) + alias: src2 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3496,57 +3585,50 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan - alias: src2 - filterExpr: (key < 10) (type: boolean) + alias: src3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 4 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + input vertices: + 0 Map 2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3558,6 +3640,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -3608,37 +3691,46 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(_col0) < 10.0D) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3646,53 +3738,53 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: src1 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + alias: src3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + input vertices: + 0 Map 1 + 1 Map 2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) null sort order: zzzzzz sort order: ++++++ - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 2 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_join30.q.out ql/src/test/results/clientpositive/spark/auto_join30.q.out index 0d3c6df069..a86d407497 100644 --- ql/src/test/results/clientpositive/spark/auto_join30.q.out +++ ql/src/test/results/clientpositive/spark/auto_join30.q.out @@ -74,21 +74,25 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -167,15 +171,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -201,21 +209,25 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -294,15 +306,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -328,21 +344,25 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -433,8 +453,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -454,8 +474,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -498,17 +518,21 @@ STAGE PLANS: 1 Map 3 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -599,16 +623,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -616,16 +644,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -640,35 +672,43 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -759,16 +799,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -776,16 +820,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -813,22 +861,26 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 1 Map 3 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -919,16 +971,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -936,16 +992,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -973,22 +1033,26 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1079,16 +1143,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1096,16 +1164,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1127,28 +1199,32 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Right Outer Join 0 to 1 + Inner Join 0 to 1 Right Outer Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/auto_join31.q.out ql/src/test/results/clientpositive/spark/auto_join31.q.out index a20b6abb12..582634ec9c 100644 --- ql/src/test/results/clientpositive/spark/auto_join31.q.out +++ ql/src/test/results/clientpositive/spark/auto_join31.q.out @@ -34,20 +34,24 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -55,16 +59,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -72,46 +80,54 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Map 1 - 2 Map 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index bef3125869..8fa89550ce 100644 --- ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -40,56 +40,68 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 5 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Inner Join 0 to 1 Inner Join 0 to 2 keys: 0 _col0 (type: string) @@ -97,20 +109,24 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col2, _col3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - keys: _col2 (type: string) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: _col2 (type: string), hash(_col2,_col3) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out index 11dcc6d860..8e9d3b493f 100644 --- ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out +++ ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out @@ -1,3 +1,4 @@ +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select t2.* from @@ -43,8 +44,8 @@ STAGE PLANS: Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 '105' (type: string) - 1 '105' (type: string) + 0 + 1 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -68,18 +69,18 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 '105' (type: string) - 1 '105' (type: string) + 0 + 1 input vertices: 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31250 Data size: 695250 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '105' (type: string), 'val_105' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31250 Data size: 695250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31250 Data size: 695250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -94,6 +95,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: select t2.* from (select key,value from (select key,value from src) t1 sort by key) t2 diff --git ql/src/test/results/clientpositive/spark/join0.q.out ql/src/test/results/clientpositive/spark/join0.q.out index ed11931ba1..95e458482d 100644 --- ql/src/test/results/clientpositive/spark/join0.q.out +++ ql/src/test/results/clientpositive/spark/join0.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -35,10 +35,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -53,10 +53,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -102,7 +102,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN FORMATTED SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -123,7 +123,7 @@ SELECT src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM (SELECT * FROM src WHERE src.key < 10) src1 diff --git ql/src/test/results/clientpositive/spark/join15.q.out ql/src/test/results/clientpositive/spark/join15.q.out index e586f33a74..cff5bf52d1 100644 --- ql/src/test/results/clientpositive/spark/join15.q.out +++ ql/src/test/results/clientpositive/spark/join15.q.out @@ -29,13 +29,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 4 Map Operator Tree: @@ -46,13 +50,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -60,19 +68,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + null sort order: zzzz + sort order: ++++ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - null sort order: zzzz - sort order: ++++ - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/join20.q.out ql/src/test/results/clientpositive/spark/join20.q.out index 481eef2d40..15d531a1d4 100644 --- ql/src/test/results/clientpositive/spark/join20.q.out +++ ql/src/test/results/clientpositive/spark/join20.q.out @@ -26,48 +26,60 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -78,22 +90,18 @@ STAGE PLANS: filter predicates: 0 1 - 2 {(KEY.reducesinkkey0 < 20)} + 2 {VALUE._col1} keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: @@ -701,48 +709,60 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key < 15) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key < 15)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 10) and (key < 15)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -753,22 +773,18 @@ STAGE PLANS: filter predicates: 0 1 - 2 {(KEY.reducesinkkey0 < 20)} + 2 {VALUE._col1} keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/join21.q.out ql/src/test/results/clientpositive/spark/join21.q.out index 4c117cd4b5..2f517b3020 100644 --- ql/src/test/results/clientpositive/spark/join21.q.out +++ ql/src/test/results/clientpositive/spark/join21.q.out @@ -25,69 +25,79 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: src2 - filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 Right Outer Join 1 to 2 filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} + 0 1 - 2 {(KEY.reducesinkkey0 < 10)} + 2 {VALUE._col1} keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/join23.q.out ql/src/test/results/clientpositive/spark/join23.q.out index 6823bdb935..ac291c2708 100644 --- ql/src/test/results/clientpositive/spark/join23.q.out +++ ql/src/test/results/clientpositive/spark/join23.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value PREHOOK: type: QUERY @@ -25,31 +25,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -59,17 +67,13 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + null sort order: zzzz + sort order: ++++ Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - null sort order: zzzz - sort order: ++++ - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: @@ -91,7 +95,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/spark/join40.q.out ql/src/test/results/clientpositive/spark/join40.q.out index 3efa147330..9642c20671 100644 --- ql/src/test/results/clientpositive/spark/join40.q.out +++ ql/src/test/results/clientpositive/spark/join40.q.out @@ -1813,48 +1813,60 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -1865,22 +1877,18 @@ STAGE PLANS: filter predicates: 0 1 - 2 {(KEY.reducesinkkey0 < 20)} + 2 {VALUE._col1} keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: @@ -2488,48 +2496,60 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key < 15) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key < 15)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 10) and (key < 15)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -2540,22 +2560,18 @@ STAGE PLANS: filter predicates: 0 1 - 2 {(KEY.reducesinkkey0 < 20)} + 2 {VALUE._col1} keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out index 54f08850de..a7325df6d1 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out @@ -55,104 +55,103 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src1 - filterExpr: ((key < 300) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 300) and (key < 10)) (type: boolean) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 300) (type: boolean) + filterExpr: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 300) (type: boolean) + predicate: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 3 + Map 5 Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 300) (type: boolean) + filterExpr: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 300) (type: boolean) + predicate: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - null sort order: zzz - sort order: +++ - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 4 + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 1 to 2 + filter predicates: + 0 + 1 {(UDFToDouble(KEY.reducesinkkey0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + null sort order: zzz + sort order: +++ + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -243,20 +242,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: ((key < 300) and (key < 10)) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 300) and (key < 10)) (type: boolean) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -264,20 +267,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 - filterExpr: (key < 300) (type: boolean) + filterExpr: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 300) (type: boolean) + predicate: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -292,31 +299,31 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src3 - filterExpr: (key < 300) (type: boolean) + filterExpr: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 300) (type: boolean) + predicate: (UDFToDouble(key) < 300.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 1 to 2 + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 1 Map 2 Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) diff --git ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out index 7850255f2c..d2d0473e4c 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out @@ -248,79 +248,98 @@ POSTHOOK: Input: default@dest_2_n0 POSTHOOK: Input: default@src1 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src1 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: src2 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 3 + Map 5 Map Operator Tree: TableScan alias: src3 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 4 + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1104,12 +1123,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1117,12 +1144,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src3 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1137,29 +1172,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 4 - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + 2 Map 4 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/parallel_join0.q.out ql/src/test/results/clientpositive/spark/parallel_join0.q.out index 3acb2ec941..77a7819f68 100644 --- ql/src/test/results/clientpositive/spark/parallel_join0.q.out +++ ql/src/test/results/clientpositive/spark/parallel_join0.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -35,10 +35,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -53,10 +53,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (key < 10) (type: boolean) + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -102,7 +102,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN FORMATTED SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -123,7 +123,7 @@ SELECT src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM (SELECT * FROM src WHERE src.key < 10) src1 diff --git ql/src/test/results/clientpositive/spark/ppd_join4.q.out ql/src/test/results/clientpositive/spark/ppd_join4.q.out index bde0232ac6..42de88d393 100644 --- ql/src/test/results/clientpositive/spark/ppd_join4.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join4.q.out @@ -30,6 +30,7 @@ POSTHOOK: Input: default@dual POSTHOOK: Output: default@test_tbl POSTHOOK: Lineage: test_tbl.id SIMPLE [] POSTHOOK: Lineage: test_tbl.name SIMPLE [] +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select t2.* from @@ -56,7 +57,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,10 +72,8 @@ STAGE PLANS: Select Operator Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: 'a' (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: 'a' (type: string) + null sort order: + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized Map 3 @@ -86,12 +85,12 @@ STAGE PLANS: Filter Operator predicate: (id = 'a') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: 'a' (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: 'a' (type: string) + Select Operator Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -99,16 +98,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 id (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + 0 + 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 'a' (type: string), 'c' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -120,6 +119,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select t2.* from (select id,name from (select id,name from test_tbl) t1 sort by id) t2 diff --git ql/src/test/results/clientpositive/spark/semijoin.q.out ql/src/test/results/clientpositive/spark/semijoin.q.out index 691d0933ab..31243284c1 100644 --- ql/src/test/results/clientpositive/spark/semijoin.q.out +++ ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -151,13 +151,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan @@ -189,7 +193,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE @@ -265,13 +269,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan @@ -303,7 +311,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE @@ -381,13 +389,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan @@ -419,7 +431,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 86 Basic stats: PARTIAL Column stats: NONE @@ -484,18 +496,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: key is not null (type: boolean) + filterExpr: (key < 15) (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: (key < 15) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), true (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), true (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan @@ -506,20 +522,20 @@ STAGE PLANS: predicate: (key < 15) (type: boolean) Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col1 + expressions: key (type: int), key is not null (type: boolean) + outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col1 (type: int), _col1 (type: int) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -527,28 +543,28 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 _col1 (type: int) + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -612,13 +628,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), true (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), true (type: boolean) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan @@ -629,20 +649,20 @@ STAGE PLANS: predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -650,8 +670,8 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -715,6 +735,26 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: t3_n12 @@ -739,50 +779,34 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -830,6 +854,26 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), true (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), true (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: t2_n33 @@ -839,65 +883,49 @@ STAGE PLANS: predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), value is not null (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int), _col1 (type: boolean) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -942,6 +970,26 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key > 2) (type: boolean) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: t1_n55 @@ -966,46 +1014,30 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1043,16 +1075,37 @@ POSTHOOK: Input: default@t1_n55 POSTHOOK: Input: default@t3_n12 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -1071,53 +1124,35 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1190,24 +1225,28 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: value (type: string), key (type: int) + outputColumnNames: _col1, _col2 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan alias: b - filterExpr: (2 * key) is not null (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (2 * key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: (2 * key) (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1217,10 +1256,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (2 * _col0) (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -1228,8 +1267,8 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1304,13 +1343,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan @@ -1320,13 +1363,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -1359,21 +1406,17 @@ STAGE PLANS: Inner Join 0 to 1 Left Semi Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + value expressions: _col2 (type: int), _col3 (type: string) Reducer 3 Reduce Operator Tree: Select Operator @@ -1451,12 +1494,16 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -1488,7 +1535,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int), value (type: string) + 0 _col0 (type: int), _col1 (type: string) 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE @@ -1554,16 +1601,37 @@ POSTHOOK: Input: default@t2_n33 POSTHOOK: Input: default@t3_n12 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -1582,14 +1650,13 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: c @@ -1608,50 +1675,30 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + Left Semi Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -1725,54 +1772,74 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 Left Semi Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE @@ -1867,45 +1934,65 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1913,8 +2000,8 @@ STAGE PLANS: Right Outer Join 0 to 1 Left Semi Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE @@ -2012,54 +2099,74 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Full Outer Join 0 to 1 + Right Outer Join 0 to 1 Left Semi Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE @@ -2157,45 +2264,65 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2203,9 +2330,9 @@ STAGE PLANS: Left Semi Join 0 to 1 Left Outer Join 0 to 2 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - 2 key (type: int) + 2 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2302,45 +2429,61 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2348,9 +2491,9 @@ STAGE PLANS: Left Semi Join 0 to 1 Right Outer Join 0 to 2 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - 2 key (type: int) + 2 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2449,45 +2592,61 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2495,9 +2654,9 @@ STAGE PLANS: Left Semi Join 0 to 1 Full Outer Join 0 to 2 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - 2 key (type: int) + 2 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2613,13 +2772,17 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -2649,20 +2812,28 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: value (type: string) + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 key (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE @@ -2680,7 +2851,7 @@ STAGE PLANS: Left Outer Join 0 to 1 keys: 0 _col1 (type: string) - 1 value (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 196 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/spark/sort.q.out ql/src/test/results/clientpositive/spark/sort.q.out index f0c5bd2983..3de5a9d9d8 100644 --- ql/src/test/results/clientpositive/spark/sort.q.out +++ ql/src/test/results/clientpositive/spark/sort.q.out @@ -564,3 +564,569 @@ POSTHOOK: Input: default@src 97 val_97 98 val_98 98 val_98 +PREHOOK: query: EXPLAIN +SELECT x.* FROM SRC x SORT BY 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x.* FROM SRC x SORT BY 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reducer 2 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM SRC x SORT BY 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM SRC x SORT BY 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 diff --git ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index 563a6a684f..2c548d608a 100644 --- ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -262,7 +262,7 @@ select src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in Stage-1 Reducer 2 <- Map 1 (GROUP) @@ -278,15 +278,17 @@ Stage-0 <-Map 1 [GROUP] GROUP [RS_12] Group By Operator [GBY_11] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(hash(_col0,_col1,_col2,_col3))"] - Map Join Operator [MAPJOIN_18] (rows=27556 width=22) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Select Operator [SEL_2] (rows=166 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=166 width=10) - predicate:(key < 10) - TableScan [TS_0] (rows=500 width=10) - default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Output:["_col0"],aggregations:["sum(_col0)"] + Select Operator [SEL_9] (rows=27556 width=22) + Output:["_col0"] + Map Join Operator [MAPJOIN_18] (rows=27556 width=22) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Select Operator [SEL_2] (rows=166 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_16] (rows=166 width=10) + predicate:(UDFToDouble(key) < 10.0D) + TableScan [TS_0] (rows=500 width=10) + default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"] Map Reduce Local Work Stage-2 Map 3 @@ -294,7 +296,7 @@ Stage-0 Select Operator [SEL_5] (rows=166 width=10) Output:["_col0","_col1"] Filter Operator [FIL_17] (rows=166 width=10) - predicate:(key < 10) + predicate:(UDFToDouble(key) < 10.0D) TableScan [TS_3] (rows=500 width=10) default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"] Map Reduce Local Work @@ -3608,7 +3610,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_CTAS1_n0 -Plan not optimized by CBO because the statement has sort by +Plan not optimized by CBO because the statement has sort by with limit Vertex dependency in root stage Reducer 2 <- Map 1 (PARTITION-LEVEL SORT) @@ -3663,7 +3665,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_ctas3_n0 -Plan not optimized by CBO because the statement has sort by +Plan not optimized by CBO because the statement has sort by with limit Vertex dependency in root stage Reducer 2 <- Map 1 (PARTITION-LEVEL SORT) @@ -3756,7 +3758,7 @@ select src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Map 4 (PARTITION-LEVEL SORT) @@ -3779,7 +3781,7 @@ Stage-0 Select Operator [SEL_2] (rows=166 width=178) Output:["_col0","_col1"] Filter Operator [FIL_13] (rows=166 width=178) - predicate:(key < 10) + predicate:(UDFToDouble(key) < 10.0D) TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 4 [PARTITION-LEVEL SORT] @@ -3787,7 +3789,7 @@ Stage-0 Select Operator [SEL_5] (rows=166 width=178) Output:["_col0","_col1"] Filter Operator [FIL_14] (rows=166 width=178) - predicate:(key < 10) + predicate:(UDFToDouble(key) < 10.0D) TableScan [TS_3] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/spark/union_ppr.q.out ql/src/test/results/clientpositive/spark/union_ppr.q.out index 14a63b7e95..e3f926a526 100644 --- ql/src/test/results/clientpositive/spark/union_ppr.q.out +++ ql/src/test/results/clientpositive/spark/union_ppr.q.out @@ -39,25 +39,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: ((key < 100) and (ds = '2008-04-08')) (type: boolean) + filterExpr: ((UDFToDouble(key) < 100.0D) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 666 Data size: 7074 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) - null sort order: zzzz - sort order: ++++ + key expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + null sort order: zzz + sort order: +++ Statistics: Num rows: 666 Data size: 7074 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false @@ -164,31 +164,31 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a:__u1-subquery1:x] - /srcpart/ds=2008-04-08/hr=12 [a:__u1-subquery1:x] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0-subquery1:x] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0-subquery1:x] Map 3 Map Operator Tree: TableScan alias: y - filterExpr: ((key < 100) and (ds = '2008-04-08')) (type: boolean) + filterExpr: ((UDFToDouble(key) < 100.0D) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 666 Data size: 7074 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) - null sort order: zzzz - sort order: ++++ + key expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + null sort order: zzz + sort order: +++ Statistics: Num rows: 666 Data size: 7074 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false @@ -295,14 +295,14 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a:__u1-subquery2:y] - /srcpart/ds=2008-04-08/hr=12 [a:__u1-subquery2:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0-subquery2:y] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0-subquery2:y] Reducer 2 Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), '2008-04-08' (type: string), KEY.reducesinkkey3 (type: string) + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), '2008-04-08' (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7074 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out index 02b4f6bf32..6b294a2774 100644 --- ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out +++ ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out @@ -601,33 +601,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_orc_bucketed_n0 - filterExpr: ((b = 4294967363L) and (t < 100Y)) (type: boolean) + filterExpr: ((t < 100Y) and (b = 4294967363L)) (type: boolean) Statistics: Num rows: 2098 Data size: 706986 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((b = 4294967363L) and (t < 100Y)) (type: boolean) - Statistics: Num rows: 6 Data size: 2022 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((t < 100Y) and (b = 4294967363L)) (type: boolean) + Statistics: Num rows: 7 Data size: 2359 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6 Data size: 2502 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2919 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 6 Data size: 2502 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2919 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col5 (type: float), _col6 (type: double), _col7 (type: boolean), _col8 (type: string), _col9 (type: timestamp), _col10 (type: decimal(4,2)), _col11 (type: binary) Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), 0 (type: int), 4294967363L (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary) + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), 0 (type: int), 4294967363L (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: double), VALUE._col4 (type: boolean), VALUE._col5 (type: string), VALUE._col6 (type: timestamp), VALUE._col7 (type: decimal(4,2)), VALUE._col8 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6 Data size: 2502 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2919 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 2502 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2919 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git ql/src/test/results/clientpositive/union_ppr.q.out ql/src/test/results/clientpositive/union_ppr.q.out index 89985d0b1a..29250d2ddf 100644 --- ql/src/test/results/clientpositive/union_ppr.q.out +++ ql/src/test/results/clientpositive/union_ppr.q.out @@ -34,54 +34,54 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: ((key < 100) and (ds = '2008-04-08')) (type: boolean) + filterExpr: ((UDFToDouble(key) < 100.0D) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) Statistics: Num rows: 333 Data size: 120546 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 333 Data size: 120546 Basic stats: COMPLETE Column stats: COMPLETE Union Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) - null sort order: zzzz - sort order: ++++ - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + null sort order: zzz + sort order: +++ + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false TableScan alias: y - filterExpr: ((key < 100) and (ds = '2008-04-08')) (type: boolean) + filterExpr: ((UDFToDouble(key) < 100.0D) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) Statistics: Num rows: 333 Data size: 120546 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 333 Data size: 120546 Basic stats: COMPLETE Column stats: COMPLETE Union Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) - null sort order: zzzz - sort order: ++++ - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + null sort order: zzz + sort order: +++ + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Path -> Alias: @@ -186,12 +186,12 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a:__u1-subquery1:x, a:__u1-subquery2:y] - /srcpart/ds=2008-04-08/hr=12 [a:__u1-subquery1:x, a:__u1-subquery2:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0-subquery1:x, $hdt$_0-subquery2:y] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0-subquery1:x, $hdt$_0-subquery2:y] Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), '2008-04-08' (type: string), KEY.reducesinkkey3 (type: string) + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), '2008-04-08' (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator