diff --git a/pom.xml b/pom.xml index 802d3d4..6737649 100644 --- a/pom.xml +++ b/pom.xml @@ -107,7 +107,7 @@ 3.4 1.7.7 0.8.0.RELEASE - 1.5.0 + 1.6.0 4.2.1 4.1.6 4.1.7 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 1c15012..58a7cff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -47,6 +47,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexRangeRef; +import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitor; import org.apache.calcite.rex.RexVisitorImpl; @@ -666,7 +667,7 @@ public String apply(RexNode r) { // Note: this is the last step, trying to avoid the expensive call to the metadata provider // if possible Set predicatesInSubtree = Sets.newHashSet(); - for (RexNode pred : RelMetadataQuery.getPulledUpPredicates(inp).pulledUpPredicates) { + for (RexNode pred : RelMetadataQuery.instance().getPulledUpPredicates(inp).pulledUpPredicates) { predicatesInSubtree.add(pred.toString()); predicatesInSubtree.addAll(Lists.transform(RelOptUtil.conjunctions(pred), REX_STR_FN)); } @@ -935,6 +936,12 @@ public Boolean visitFieldAccess(RexFieldAccess fieldAccess) { // ".FIELD" is constant iff "" is constant. return fieldAccess.getReferenceExpr().accept(this); } + + @Override + public Boolean visitSubQuery(RexSubQuery subQuery) { + // it seems that it is not used by anything. + return false; + } } public static Set getInputRefs(RexNode expr) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index eeec44e..83205bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -28,6 +28,7 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.RelFactories.AggregateFactory; @@ -147,6 +148,14 @@ public RelNode createJoin(RelNode left, RelNode right, RexNode condition, JoinRe Set variablesStopped, boolean semiJoinDone) { return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType, false); } + + @Override + public RelNode createJoin(RelNode left, RelNode right, RexNode condition, + Set variablesSet, JoinRelType joinType, boolean semiJoinDone) { + // According to calcite, it is going to be removed before Calcite-2.0 + // TODO: to handle CorrelationId + return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType, semiJoinDone); + } } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java index 6840418..8c00322 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java @@ -200,7 +200,7 @@ public double computeSMBMapJoinIOCost( } public static boolean isFittingIntoMemory(Double maxSize, RelNode input, int buckets) { - Double currentMemory = RelMetadataQuery.cumulativeMemoryWithinPhase(input); + Double currentMemory = RelMetadataQuery.instance().cumulativeMemoryWithinPhase(input); if (currentMemory != null) { if(currentMemory / buckets > maxSize) { return false; @@ -314,8 +314,8 @@ public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation strea if (streamingSide == MapJoinStreamingRelation.NONE || streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) { // Left side - final Double leftAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); - final Double leftRowCount = RelMetadataQuery.getRowCount(join.getLeft()); + final Double leftAvgRowSize = RelMetadataQuery.instance().getAverageRowSize(join.getLeft()); + final Double leftRowCount = RelMetadataQuery.instance().getRowCount(join.getLeft()); if (leftAvgRowSize == null || leftRowCount == null) { return null; } @@ -324,8 +324,8 @@ public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation strea if (streamingSide == MapJoinStreamingRelation.NONE || streamingSide == MapJoinStreamingRelation.LEFT_RELATION) { // Right side - final Double rightAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getRight()); - final Double rightRowCount = RelMetadataQuery.getRowCount(join.getRight()); + final Double rightAvgRowSize = RelMetadataQuery.instance().getAverageRowSize(join.getRight()); + final Double rightRowCount = RelMetadataQuery.instance().getRowCount(join.getRight()); if (rightAvgRowSize == null || rightRowCount == null) { return null; } @@ -338,8 +338,8 @@ public static Integer getSplitCountWithRepartition(HiveJoin join) { final Double maxSplitSize = join.getCluster().getPlanner().getContext(). unwrap(HiveAlgorithmsConf.class).getMaxSplitSize(); // We repartition: new number of splits - final Double averageRowSize = RelMetadataQuery.getAverageRowSize(join); - final Double rowCount = RelMetadataQuery.getRowCount(join); + final Double averageRowSize = RelMetadataQuery.instance().getAverageRowSize(join); + final Double rowCount = RelMetadataQuery.instance().getRowCount(join); if (averageRowSize == null || rowCount == null) { return null; } @@ -357,7 +357,7 @@ public static Integer getSplitCountWithoutRepartition(HiveJoin join) { } else { return null; } - return RelMetadataQuery.splitCount(largeInput); + return RelMetadataQuery.instance().splitCount(largeInput); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java index 6669d32..f1037e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java @@ -84,8 +84,8 @@ public boolean isExecutable(HiveJoin join) { @Override public RelOptCost getCost(HiveJoin join) { - double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); - double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + double leftRCount = RelMetadataQuery.instance().getRowCount(join.getLeft()); + double rightRCount = RelMetadataQuery.instance().getRowCount(join.getRight()); return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java index 61a3a64..c0086f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java @@ -78,7 +78,7 @@ public RelOptCost getDefaultCost() { @Override public RelOptCost getScanCost(HiveTableScan ts) { - return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.getAverageRowSize(ts)); + return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.instance().getAverageRowSize(ts)); } @Override @@ -87,7 +87,7 @@ public RelOptCost getAggregateCost(HiveAggregate aggregate) { return HiveCost.FACTORY.makeZeroCost(); } else { // 1. Sum of input cardinalities - final Double rCount = RelMetadataQuery.getRowCount(aggregate.getInput()); + final Double rCount = RelMetadataQuery.instance().getRowCount(aggregate.getInput()); if (rCount == null) { return null; } @@ -96,7 +96,7 @@ public RelOptCost getAggregateCost(HiveAggregate aggregate) { // 3. IO cost = cost of writing intermediary results to local FS + // cost of reading from local FS for transferring to GBy + // cost of transferring map outputs to GBy operator - final Double rAverageSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput()); + final Double rAverageSize = RelMetadataQuery.instance().getAverageRowSize(aggregate.getInput()); if (rAverageSize == null) { return null; } @@ -129,8 +129,8 @@ public boolean isExecutable(HiveJoin join) { @Override public RelOptCost getCost(HiveJoin join) { // 1. Sum of input cardinalities - final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); - final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + final Double leftRCount = RelMetadataQuery.instance().getRowCount(join.getLeft()); + final Double rightRCount = RelMetadataQuery.instance().getRowCount(join.getRight()); if (leftRCount == null || rightRCount == null) { return null; } @@ -151,8 +151,8 @@ public RelOptCost getCost(HiveJoin join) { // 3. IO cost = cost of writing intermediary results to local FS + // cost of reading from local FS for transferring to join + // cost of transferring map outputs to Join operator - final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); - final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + final Double leftRAverageSize = RelMetadataQuery.instance().getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.instance().getAverageRowSize(join.getRight()); if (leftRAverageSize == null || rightRAverageSize == null) { return null; } @@ -187,8 +187,8 @@ public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { join.setJoinAlgorithm(TezCommonJoinAlgorithm.INSTANCE); final Double memoryWithinPhase = - RelMetadataQuery.cumulativeMemoryWithinPhase(join); - final Integer splitCount = RelMetadataQuery.splitCount(join); + RelMetadataQuery.instance().cumulativeMemoryWithinPhase(join); + final Integer splitCount = RelMetadataQuery.instance().splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { @@ -239,8 +239,8 @@ public boolean isExecutable(HiveJoin join) { @Override public RelOptCost getCost(HiveJoin join) { // 1. Sum of input cardinalities - final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); - final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + final Double leftRCount = RelMetadataQuery.instance().getRowCount(join.getLeft()); + final Double rightRCount = RelMetadataQuery.instance().getRowCount(join.getRight()); if (leftRCount == null || rightRCount == null) { return null; } @@ -251,7 +251,7 @@ public RelOptCost getCost(HiveJoin join) { add(leftRCount). add(rightRCount). build(); - ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder(); + ImmutableBitSet.Builder streamingBuilder = ImmutableBitSet.builder(); switch (join.getStreamingSide()) { case LEFT_RELATION: streamingBuilder.set(0); @@ -266,8 +266,8 @@ public RelOptCost getCost(HiveJoin join) { final double cpuCost = HiveAlgorithmsUtil.computeMapJoinCPUCost(cardinalities, streaming); // 3. IO cost = cost of transferring small tables to join node * // degree of parallelism - final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); - final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + final Double leftRAverageSize = RelMetadataQuery.instance().getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.instance().getAverageRowSize(join.getRight()); if (leftRAverageSize == null || rightRAverageSize == null) { return null; } @@ -277,8 +277,8 @@ public RelOptCost getCost(HiveJoin join) { build(); JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezMapJoinAlgorithm.INSTANCE); - final int parallelism = RelMetadataQuery.splitCount(join) == null - ? 1 : RelMetadataQuery.splitCount(join); + final int parallelism = RelMetadataQuery.instance().splitCount(join) == null + ? 1 : RelMetadataQuery.instance().splitCount(join); join.setJoinAlgorithm(oldAlgo); final double ioCost = algoUtils.computeMapJoinIOCost(relationInfos, streaming, parallelism); // 4. Result @@ -322,7 +322,7 @@ public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { return null; } // If simple map join, the whole relation goes in memory - return RelMetadataQuery.cumulativeMemoryWithinPhase(inMemoryInput); + return RelMetadataQuery.instance().cumulativeMemoryWithinPhase(inMemoryInput); } @Override @@ -376,7 +376,7 @@ public boolean isExecutable(HiveJoin join) { // What we need is a way to get buckets not splits JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezBucketJoinAlgorithm.INSTANCE); - Integer buckets = RelMetadataQuery.splitCount(smallInput); + Integer buckets = RelMetadataQuery.instance().splitCount(smallInput); join.setJoinAlgorithm(oldAlgo); if (buckets == null) { @@ -388,7 +388,7 @@ public boolean isExecutable(HiveJoin join) { for (int i=0; i joinKeysInChildren = new ArrayList(); @@ -203,7 +203,7 @@ public ImmutableBitSet getSortedInputs() throws CalciteSemanticException { for (int i=0; i exps, Rel } @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return RelMetadataQuery.getNonCumulativeCost(this); + public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery relMetadataQuery) { + return relMetadataQuery.getNonCumulativeCost(this); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java index 4fac13e..11a0096 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java @@ -108,8 +108,8 @@ public void implement(Implementor implementor) { } @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return RelMetadataQuery.getNonCumulativeCost(this); + public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery relMetadataQuery) { + return relMetadataQuery.getNonCumulativeCost(this); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java index 5788805..a19d372 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java @@ -122,8 +122,8 @@ public HiveTableScan copy(RelDataType newRowtype) { } @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return RelMetadataQuery.getNonCumulativeCost(this); + public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery relMetadataQuery) { + return relMetadataQuery.getNonCumulativeCost(this); } @Override public RelWriter explainTerms(RelWriter pw) { @@ -146,8 +146,9 @@ public void implement(Implementor implementor) { } + //getRows will call estimateRowCount @Override - public double getRows() { + public double estimateRowCount(RelMetadataQuery mq) { return ((RelOptHiveTable) table).getRowCount(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index 070c7ea..fea7711 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -121,7 +121,7 @@ public void onMatch(RelOptRuleCall call) { // Do the columns used by the join appear in the output of the aggregate? final ImmutableBitSet aggregateColumns = aggregate.getGroupSet(); final ImmutableBitSet keyColumns = keyColumns(aggregateColumns, - RelMetadataQuery.getPulledUpPredicates(join).pulledUpPredicates); + RelMetadataQuery.instance().getPulledUpPredicates(join).pulledUpPredicates); final ImmutableBitSet joinColumns = RelOptUtil.InputFinder.bits(join.getCondition()); final boolean allColumnsInAggregate = @@ -179,7 +179,7 @@ public void onMatch(RelOptRuleCall call) { unique = true; } else { final Boolean unique0 = - RelMetadataQuery.areColumnsUnique(joinInput, belowAggregateKey); + RelMetadataQuery.instance().areColumnsUnique(joinInput, belowAggregateKey); unique = unique0 != null && unique0; } if (unique) { @@ -299,8 +299,8 @@ public Integer apply(Integer a0) { } // Make a cost based decision to pick cheaper plan - RelOptCost afterCost = RelMetadataQuery.getCumulativeCost(r); - RelOptCost beforeCost = RelMetadataQuery.getCumulativeCost(aggregate); + RelOptCost afterCost = RelMetadataQuery.instance().getCumulativeCost(r); + RelOptCost beforeCost = RelMetadataQuery.instance().getCumulativeCost(aggregate); if (afterCost.isLt(beforeCost)) { call.transformTo(r); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java index 7d7631b..7d4411a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java @@ -112,7 +112,7 @@ public void onMatch(RelOptRuleCall call) { // arguments then we can use a more efficient form. if ((nonDistinctCount == 0) && (argListSets.size() == 1)) { for (Integer arg : argListSets.iterator().next()) { - Set colOrigs = RelMetadataQuery.getColumnOrigins(aggregate, arg); + Set colOrigs = RelMetadataQuery.instance().getColumnOrigins(aggregate, arg); if (null != colOrigs) { for (RelColumnOrigin colOrig : colOrigs) { RelOptHiveTable hiveTbl = (RelOptHiveTable)colOrig.getOriginTable(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java index 07928d8..994af97 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -80,7 +80,7 @@ public HiveJoinPushTransitivePredicatesRule(Class clazz, public void onMatch(RelOptRuleCall call) { Join join = call.rel(0); - RelOptPredicateList preds = RelMetadataQuery.getPulledUpPredicates(join); + RelOptPredicateList preds = RelMetadataQuery.instance().getPulledUpPredicates(join); HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); assert registry != null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 50e139b..6958993 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -128,7 +128,7 @@ public FilterReduceExpressionsRule(Class filterClass, RexNode newConditionExp; boolean reduced; final RelOptPredicateList predicates = - RelMetadataQuery.getPulledUpPredicates(filter.getInput()); + RelMetadataQuery.instance().getPulledUpPredicates(filter.getInput()); if (reduceExpressions(filter, expList, predicates)) { assert expList.size() == 1; newConditionExp = expList.get(0); @@ -242,7 +242,7 @@ public boolean matches(RelOptRuleCall call) { registry.registerVisited(this, project); } final RelOptPredicateList predicates = - RelMetadataQuery.getPulledUpPredicates(project.getInput()); + RelMetadataQuery.instance().getPulledUpPredicates(project.getInput()); final List expList = Lists.newArrayList(project.getProjects()); if (reduceExpressions(project, expList, predicates)) { @@ -274,9 +274,9 @@ public JoinReduceExpressionsRule(Class joinClass, final List expList = Lists.newArrayList(join.getCondition()); final int fieldCount = join.getLeft().getRowType().getFieldCount(); final RelOptPredicateList leftPredicates = - RelMetadataQuery.getPulledUpPredicates(join.getLeft()); + RelMetadataQuery.instance().getPulledUpPredicates(join.getLeft()); final RelOptPredicateList rightPredicates = - RelMetadataQuery.getPulledUpPredicates(join.getRight()); + RelMetadataQuery.instance().getPulledUpPredicates(join.getRight()); final RelOptPredicateList predicates = leftPredicates.union(rightPredicates.shift(fieldCount)); if (!reduceExpressions(join, expList, predicates)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java index 0af60e8..2f2297d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java @@ -96,7 +96,7 @@ public boolean matches(RelOptRuleCall call) { // Finally, if we do not reduce the input size, we bail out final int offset = sortLimit.offset == null ? 0 : RexLiteral.intValue(sortLimit.offset); if (offset + RexLiteral.intValue(sortLimit.fetch) - >= RelMetadataQuery.getRowCount(reducedInput)) { + >= RelMetadataQuery.instance().getRowCount(reducedInput)) { return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java index 618c717..573b75a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java @@ -59,7 +59,7 @@ public boolean matches(RelOptRuleCall call) { // Finally, if we do not reduce the size input enough, we bail out int limit = RexLiteral.intValue(sortLimit.fetch); - Double rowCount = RelMetadataQuery.getRowCount(sortLimit.getInput()); + Double rowCount = RelMetadataQuery.instance().getRowCount(sortLimit.getInput()); if (rowCount != null && limit <= reductionProportion * rowCount && rowCount - limit >= reductionTuples) { return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java index 0ec8bf1..04b94c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java @@ -80,7 +80,7 @@ public void onMatch(RelOptRuleCall call) { final int offset = sort.offset == null ? 0 : RexLiteral.intValue(sort.offset); for (RelNode input : union.getInputs()) { // If we do not reduce the input size, we bail out - if (RexLiteral.intValue(sort.fetch) + offset < RelMetadataQuery.getRowCount(input)) { + if (RexLiteral.intValue(sort.fetch) + offset < RelMetadataQuery.instance().getRowCount(input)) { finishPushSortPastUnion = false; // Here we do some query rewrite. We first get the new fetchRN, which is // a sum of offset and fetch. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index c04060f..b533451 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -47,7 +47,7 @@ protected FilterSelectivityEstimator(RelNode childRel) { super(true); this.childRel = childRel; - this.childCardinality = RelMetadataQuery.getRowCount(childRel); + this.childCardinality = RelMetadataQuery.instance().getRowCount(childRel); } public Double estimateSelectivity(RexNode predicate) { @@ -254,7 +254,7 @@ private Double getMaxNDV(RexCall call) { for (RexNode op : call.getOperands()) { if (op instanceof RexInputRef) { - tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, + tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, RelMetadataQuery.instance(), ((RexInputRef) op).getIndex()); if (tmpNDV > maxNDV) maxNDV = tmpNDV; @@ -262,7 +262,7 @@ private Double getMaxNDV(RexCall call) { irv = new InputReferencedVisitor(); irv.apply(op); for (Integer childProjIndx : irv.inputPosReferenced) { - tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, childProjIndx); + tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, RelMetadataQuery.instance(),childProjIndx); if (tmpNDV > maxNDV) maxNDV = tmpNDV; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java index 84fa518..66bc148 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java @@ -24,6 +24,7 @@ import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdCollation; import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.util.BuiltInMethod; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; @@ -46,7 +47,7 @@ private HiveRelMdCollation() {} //~ Methods ---------------------------------------------------------------- - public ImmutableList collations(HiveAggregate aggregate) { + public ImmutableList collations(HiveAggregate aggregate, RelMetadataQuery relMetadataQuery) { // Compute collations ImmutableList.Builder collationListBuilder = new ImmutableList.Builder(); @@ -60,7 +61,7 @@ private HiveRelMdCollation() {} new HiveRelCollation(collationListBuilder.build()))); } - public ImmutableList collations(HiveJoin join) { + public ImmutableList collations(HiveJoin join, RelMetadataQuery relMetadataQuery) { return join.getCollation(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java index 1220401..03872b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java @@ -58,8 +58,8 @@ private HiveRelMdDistinctRowCount() { // Catch-all rule when none of the others apply. @Override - public Double getDistinctRowCount(RelNode rel, ImmutableBitSet groupKey, - RexNode predicate) { + public Double getDistinctRowCount(RelNode rel, RelMetadataQuery relMetadataQuery, + ImmutableBitSet groupKey, RexNode predicate) { if (rel instanceof HiveTableScan) { return getDistinctRowCount((HiveTableScan) rel, groupKey, predicate); } @@ -67,7 +67,7 @@ public Double getDistinctRowCount(RelNode rel, ImmutableBitSet groupKey, * For now use Calcite' default formulas for propagating NDVs up the Query * Tree. */ - return super.getDistinctRowCount(rel, groupKey, predicate); + return super.getDistinctRowCount(rel, relMetadataQuery, groupKey, predicate); } private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey, @@ -83,39 +83,39 @@ private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey return Math.min(noDistinctRows, htRel.getRows()); } - public static Double getDistinctRowCount(RelNode r, int indx) { + public static Double getDistinctRowCount(RelNode r, RelMetadataQuery relMetadataQuery, int indx) { ImmutableBitSet bitSetOfRqdProj = ImmutableBitSet.of(indx); - return RelMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r + return relMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r .getCluster().getRexBuilder().makeLiteral(true)); } @Override - public Double getDistinctRowCount(Join rel, ImmutableBitSet groupKey, + public Double getDistinctRowCount(Join rel, RelMetadataQuery relMetadataQuery, ImmutableBitSet groupKey, RexNode predicate) { if (rel instanceof HiveJoin) { HiveJoin hjRel = (HiveJoin) rel; //TODO: Improve this if (hjRel.isLeftSemiJoin()) { - return RelMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey, + return relMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey, rel.getCluster().getRexBuilder().makeLiteral(true)); } else { - return RelMdUtil.getJoinDistinctRowCount(rel, rel.getJoinType(), + return RelMdUtil.getJoinDistinctRowCount(relMetadataQuery, rel, rel.getJoinType(), groupKey, predicate, true); } } - return RelMetadataQuery.getDistinctRowCount(rel, groupKey, predicate); + return relMetadataQuery.getDistinctRowCount(rel, groupKey, predicate); } /* * Favor Broad Plans over Deep Plans. */ - public RelOptCost getCumulativeCost(HiveJoin rel) { - RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel); + public RelOptCost getCumulativeCost(HiveJoin rel, RelMetadataQuery relMetadataQuery) { + RelOptCost cost = relMetadataQuery.getNonCumulativeCost(rel); List inputs = rel.getInputs(); RelOptCost maxICost = HiveCost.ZERO; for (RelNode input : inputs) { - RelOptCost iCost = RelMetadataQuery.getCumulativeCost(input); + RelOptCost iCost = relMetadataQuery.getCumulativeCost(input); if (maxICost.isLt(iCost)) { maxICost = iCost; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java index b83f240..bd58e5b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java @@ -22,6 +22,7 @@ import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdDistribution; import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.util.BuiltInMethod; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; @@ -35,8 +36,7 @@ ChainedRelMetadataProvider.of( ImmutableList.of( ReflectiveRelMetadataProvider.reflectiveSource( - BuiltInMethod.DISTRIBUTION.method, new HiveRelMdDistribution()), - RelMdDistribution.SOURCE)); + BuiltInMethod.DISTRIBUTION.method, new HiveRelMdDistribution()))); //~ Constructors ----------------------------------------------------------- @@ -44,12 +44,12 @@ private HiveRelMdDistribution() {} //~ Methods ---------------------------------------------------------------- - public RelDistribution distribution(HiveAggregate aggregate) { + public RelDistribution distribution(HiveAggregate aggregate, RelMetadataQuery relMetadataQuery) { return new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, aggregate.getGroupSet().asList()); } - public RelDistribution distribution(HiveJoin join) { + public RelDistribution distribution(HiveJoin join, RelMetadataQuery relMetadataQuery) { return join.getDistribution(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java index bea5943..8fb6e18 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java @@ -52,8 +52,8 @@ public Double memory(HiveTableScan tableScan) { } public Double memory(HiveAggregate aggregate) { - final Double avgRowSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput()); - final Double rowCount = RelMetadataQuery.getRowCount(aggregate.getInput()); + final Double avgRowSize = RelMetadataQuery.instance().getAverageRowSize(aggregate.getInput()); + final Double rowCount = RelMetadataQuery.instance().getRowCount(aggregate.getInput()); if (avgRowSize == null || rowCount == null) { return null; } @@ -79,8 +79,8 @@ public Double memory(HiveProject project) { public Double memory(HiveSortLimit sort) { if (sort.getCollation() != RelCollations.EMPTY) { // It sorts - final Double avgRowSize = RelMetadataQuery.getAverageRowSize(sort.getInput()); - final Double rowCount = RelMetadataQuery.getRowCount(sort.getInput()); + final Double avgRowSize = RelMetadataQuery.instance().getAverageRowSize(sort.getInput()); + final Double rowCount = RelMetadataQuery.instance().getRowCount(sort.getInput()); if (avgRowSize == null || rowCount == null) { return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java index 2f51d3b..1e83519 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java @@ -81,7 +81,7 @@ public Integer splitCount(HiveTableScan scan) { } public Integer splitCount(RelNode rel) { - Boolean newPhase = RelMetadataQuery.isPhaseTransition(rel); + Boolean newPhase = RelMetadataQuery.instance().isPhaseTransition(rel); if (newPhase == null) { return null; @@ -95,15 +95,15 @@ public Integer splitCount(RelNode rel) { // We do not repartition: take number of splits from children Integer splitCount = 0; for (RelNode input : rel.getInputs()) { - splitCount += RelMetadataQuery.splitCount(input); + splitCount += RelMetadataQuery.instance().splitCount(input); } return splitCount; } public Integer splitCountRepartition(RelNode rel) { // We repartition: new number of splits - final Double averageRowSize = RelMetadataQuery.getAverageRowSize(rel); - final Double rowCount = RelMetadataQuery.getRowCount(rel); + final Double averageRowSize = RelMetadataQuery.instance().getAverageRowSize(rel); + final Double rowCount = RelMetadataQuery.instance().getRowCount(rel); if (averageRowSize == null || rowCount == null) { return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index b7244fd..22fe122 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -98,11 +98,10 @@ * * */ - @Override - public RelOptPredicateList getPredicates(Project project) { + public RelOptPredicateList getPredicates(Project project, RelMetadataQuery relMetadataQuery) { RelNode child = project.getInput(); final RexBuilder rexBuilder = project.getCluster().getRexBuilder(); - RelOptPredicateList childInfo = RelMetadataQuery.getPulledUpPredicates(child); + RelOptPredicateList childInfo = relMetadataQuery.getPulledUpPredicates(child); List projectPullUpPredicates = new ArrayList(); HashMultimap inpIndxToOutIndxMap = HashMultimap.create(); @@ -150,14 +149,13 @@ public RelOptPredicateList getPredicates(Project project) { } /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */ - @Override - public RelOptPredicateList getPredicates(Join join) { + public RelOptPredicateList getPredicates(Join join, RelMetadataQuery relMetadataQuery) { RexBuilder rB = join.getCluster().getRexBuilder(); RelNode left = join.getInput(0); RelNode right = join.getInput(1); - RelOptPredicateList leftInfo = RelMetadataQuery.getPulledUpPredicates(left); - RelOptPredicateList rightInfo = RelMetadataQuery.getPulledUpPredicates(right); + RelOptPredicateList leftInfo = relMetadataQuery.getPulledUpPredicates(left); + RelOptPredicateList rightInfo = relMetadataQuery.getPulledUpPredicates(right); HiveJoinConditionBasedPredicateInference jI = new HiveJoinConditionBasedPredicateInference(join, RexUtil.composeConjunction(rB, leftInfo.pulledUpPredicates, false), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index caf8978..6dca50d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -75,7 +75,6 @@ public Double getRowCount(Join join) { return join.getRows(); } - @Override public Double getRowCount(SemiJoin rel) { PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel); if (pkfk != null) { @@ -86,12 +85,11 @@ public Double getRowCount(SemiJoin rel) { } return pkfk.fkInfo.rowCount * selectivity; } - return super.getRowCount(rel); + return super.getRowCount(rel, RelMetadataQuery.instance()); } - @Override public Double getRowCount(Sort rel) { - final Double rowCount = RelMetadataQuery.getRowCount(rel.getInput()); + final Double rowCount = RelMetadataQuery.instance().getRowCount(rel.getInput()); if (rowCount != null && rel.fetch != null) { final int offset = rel.offset == null ? 0 : RexLiteral.intValue(rel.offset); final int limit = RexLiteral.intValue(rel.fetch); @@ -237,8 +235,8 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) { return null; } - double leftRowCount = RelMetadataQuery.getRowCount(left); - double rightRowCount = RelMetadataQuery.getRowCount(right); + double leftRowCount = RelMetadataQuery.instance().getRowCount(left); + double rightRowCount = RelMetadataQuery.instance().getRowCount(right); if (leftIsKey && rightIsKey) { if (rightRowCount < leftRowCount) { @@ -253,8 +251,8 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) { pkSide == 0 ? left : right, pkSide == 0 ? leftColIdx : rightColIdx) : false; - double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(left, lBitSet, leftPred) : -1; - double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(right, rBitSet, rightPred) : -1; + double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.instance().getDistinctRowCount(left, lBitSet, leftPred) : -1; + double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.instance().getDistinctRowCount(right, rBitSet, rightPred) : -1; /* * If the ndv of the PK - FK side don't match, and the PK side is a filter @@ -318,7 +316,7 @@ private static double pkSelectivity(Join joinRel, boolean leftChild, } else { HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true); if (tScan != null) { - double tRowCount = RelMetadataQuery.getRowCount(tScan); + double tRowCount = RelMetadataQuery.instance().getRowCount(tScan); return childRowCount / tRowCount; } else { return 1.0; @@ -328,7 +326,7 @@ private static double pkSelectivity(Join joinRel, boolean leftChild, private static boolean isKey(ImmutableBitSet c, RelNode rel) { boolean isKey = false; - Set keys = RelMetadataQuery.getUniqueKeys(rel); + Set keys = RelMetadataQuery.instance().getUniqueKeys(rel); if (keys != null) { for (ImmutableBitSet key : keys) { if (key.equals(c)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index a0eb83d..3ee23a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -63,8 +63,8 @@ public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemant return computeInnerJoinSelectivity(j, predicate); } else if (j.getJoinType().equals(JoinRelType.LEFT) || j.getJoinType().equals(JoinRelType.RIGHT)) { - double left = RelMetadataQuery.getRowCount(j.getLeft()); - double right = RelMetadataQuery.getRowCount(j.getRight()); + double left = RelMetadataQuery.instance().getRowCount(j.getLeft()); + double right = RelMetadataQuery.instance().getRowCount(j.getRight()); double product = left * right; double innerJoinSelectivity = computeInnerJoinSelectivity(j, predicate); if (j.getJoinType().equals(JoinRelType.LEFT)) { @@ -97,14 +97,14 @@ private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws // Join which are part of join keys for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) { colStatMapBuilder.put(ljk, - HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), ljk)); + HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), RelMetadataQuery.instance(), ljk)); } // 2. Update Col Stats Map with col stats for columns from right side of // Join which are part of join keys for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) { colStatMapBuilder.put(rjk + rightOffSet, - HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), rjk)); + HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), RelMetadataQuery.instance(), rjk)); } colStatMap = colStatMapBuilder.build(); @@ -116,11 +116,11 @@ private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws ndvCrossProduct = exponentialBackoff(peLst, colStatMap); if (j.isLeftSemiJoin()) - ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()), + ndvCrossProduct = Math.min(RelMetadataQuery.instance().getRowCount(j.getLeft()), ndvCrossProduct); else - ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()) - * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct); + ndvCrossProduct = Math.min(RelMetadataQuery.instance().getRowCount(j.getLeft()) + * RelMetadataQuery.instance().getRowCount(j.getRight()), ndvCrossProduct); } // 4. Join Selectivity = 1/NDV diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java index 3224039..9b4c89e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java @@ -81,10 +81,10 @@ private HiveRelMdSize() {} final RelNode left = rel.getLeft(); final RelNode right = rel.getRight(); final List lefts = - RelMetadataQuery.getAverageColumnSizes(left); + RelMetadataQuery.instance().getAverageColumnSizes(left); List rights = null; if (!rel.isLeftSemiJoin()) { - rights = RelMetadataQuery.getAverageColumnSizes(right); + rights = RelMetadataQuery.instance().getAverageColumnSizes(right); } if (lefts == null && rights == null) { return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java index 7c22c33..058e886 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java @@ -34,6 +34,7 @@ import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdUniqueKeys; import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.BitSets; @@ -59,12 +60,12 @@ * Inferring Uniqueness for all columns is very expensive right now. The flip * side of doing this is, it only works post Field Trimming. */ - public Set getUniqueKeys(Project rel, boolean ignoreNulls) { + public Set getUniqueKeys(Project rel, RelMetadataQuery relMetadataQuery, boolean ignoreNulls) { HiveTableScan tScan = getTableScan(rel.getInput(), false); if ( tScan == null ) { - Function fn = RelMdUniqueKeys.SOURCE.apply( + Function fn = (Function) RelMdUniqueKeys.SOURCE.apply( rel.getClass(), BuiltInMetadata.UniqueKeys.class); return ((BuiltInMetadata.UniqueKeys) fn.apply(rel)) .getUniqueKeys(ignoreNulls); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index c79b1be..9f2f679 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -432,8 +432,8 @@ OpAttr visit(HiveSortLimit sortRel) throws SemanticException { // 1.a. Extract order for each column from collation // Generate sortCols and order - ImmutableBitSet.Builder sortColsPosBuilder = new ImmutableBitSet.Builder(); - ImmutableBitSet.Builder sortOutputColsPosBuilder = new ImmutableBitSet.Builder(); + ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder(); + ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder(); Map obRefToCallMap = sortRel.getInputRefToCallMap(); List sortCols = new ArrayList(); StringBuilder order = new StringBuilder(); diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out b/ql/src/test/results/clientpositive/auto_join12.q.out index 27858e7..1a2e212 100644 --- a/ql/src/test/results/clientpositive/auto_join12.q.out +++ b/ql/src/test/results/clientpositive/auto_join12.q.out @@ -29,14 +29,14 @@ STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_2:src + $hdt$_0:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -52,7 +52,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - $hdt$_0:$hdt$_2:src + $hdt$_0:$hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/auto_join13.q.out b/ql/src/test/results/clientpositive/auto_join13.q.out index fa03d2c..219d16e 100644 --- a/ql/src/test/results/clientpositive/auto_join13.q.out +++ b/ql/src/test/results/clientpositive/auto_join13.q.out @@ -32,7 +32,7 @@ STAGE PLANS: $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_2:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -41,31 +41,31 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) < 200.0) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - 1 UDFToDouble(_col0) (type: double) - $hdt$_0:$hdt$_1:src + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_0:$hdt$_2:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) < 100.0) (type: boolean) + predicate: (UDFToDouble(key) < 200.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + 1 UDFToDouble(_col0) (type: double) Stage: Stage-3 Map Reduce diff --git a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index 1521a71..8871ebc 100644 --- a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -294,16 +294,16 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) Stage: Stage-9 Map Reduce @@ -312,19 +312,19 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -358,7 +358,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Stage: Stage-6 @@ -369,16 +369,20 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -424,16 +428,20 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -449,11 +457,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col2 (type: string) TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -474,16 +482,20 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-15 Map Reduce Local Work @@ -497,16 +509,16 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) Stage: Stage-10 Map Reduce @@ -515,19 +527,19 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -545,41 +557,41 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -677,20 +689,20 @@ STAGE PLANS: Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:a + $hdt$_1:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:a + $hdt$_1:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) > 100.0) (type: boolean) + predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -704,11 +716,11 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -716,7 +728,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -733,11 +745,11 @@ STAGE PLANS: Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:a + $hdt$_2:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:a + $hdt$_2:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -750,7 +762,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Stage: Stage-6 @@ -761,16 +773,20 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -816,16 +832,20 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -841,11 +861,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -866,34 +886,38 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:a + $hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -907,11 +931,11 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) > 100.0) (type: boolean) + predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -919,7 +943,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -937,33 +961,33 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) > 100.0) (type: boolean) + predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -971,7 +995,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/cbo_const.q.out b/ql/src/test/results/clientpositive/cbo_const.q.out index d6682ca..6de92ac 100644 --- a/ql/src/test/results/clientpositive/cbo_const.q.out +++ b/ql/src/test/results/clientpositive/cbo_const.q.out @@ -233,22 +233,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) = 3.0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '3.0' (type: string) - sort order: + - Map-reduce partition columns: '3.0' (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -259,20 +243,34 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '3.0' (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: '3.0' (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + TableScan + alias: z + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ds = '2008-04-08') and (UDFToDouble(hr) = 14.0)) and value is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2 + Statistics: Num rows: 13 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -285,42 +283,43 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: '3.0' (type: string) sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map-reduce partition columns: '3.0' (type: string) + Statistics: Num rows: 13 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) TableScan - alias: z - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ds = '2008-04-08') and (UDFToDouble(hr) = 14.0)) and value is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: (UDFToDouble(key) = 3.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: '3.0' (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map-reduce partition columns: '3.0' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '3.0' (type: string), _col4 (type: string), _col1 (type: string) + expressions: '3.0' (type: string), _col2 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out index e210c7f..c60e09c 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out @@ -420,7 +420,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) (type: boolean) + predicate: ((UDFToDouble(key) > 20.0) and (UDFToDouble(key) < 200.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -435,7 +435,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 20.0) and (UDFToDouble(key) < 200.0)) (type: boolean) + predicate: ((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -453,19 +453,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash + Select Operator + expressions: _col1 (type: string) outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -597,7 +601,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) (type: boolean) + predicate: ((UDFToDouble(key) > 20.0) and (UDFToDouble(key) < 200.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -612,7 +616,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 20.0) and (UDFToDouble(key) < 200.0)) (type: boolean) + predicate: ((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -649,25 +653,29 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 112 Data size: 1183 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 112 Data size: 1183 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -781,19 +789,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash + Select Operator + expressions: _col1 (type: string) outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -989,25 +1001,29 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 165 Data size: 1752 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 165 Data size: 1752 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 1c3a5ab..7707eeb 100644 --- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -9,12 +9,12 @@ FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AN WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -72,10 +72,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -90,24 +97,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1 + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string) + expressions: _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -135,12 +135,12 @@ FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AN WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -198,10 +198,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -216,24 +223,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1 + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string) + expressions: _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out index 182dd3a..a90b22a 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -379,8 +379,8 @@ InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test distribute by, should only be bucketed by key diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out index 1e4db29..83e5ce5 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out @@ -275,8 +275,8 @@ InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 16 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the diff --git a/ql/src/test/results/clientpositive/join28.q.out b/ql/src/test/results/clientpositive/join28.q.out index 9aadd28..f266a97 100644 --- a/ql/src/test/results/clientpositive/join28.q.out +++ b/ql/src/test/results/clientpositive/join28.q.out @@ -31,54 +31,54 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_0:$hdt$_1:x Fetch Operator limit: -1 - $hdt$_1:$hdt$_2:x + $hdt$_1:z Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_0:$hdt$_1:x TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_1:$hdt$_2:x + $hdt$_1:z TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -109,10 +109,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col0, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -136,7 +136,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index 5cb124b..ed18570 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -112,7 +112,7 @@ STAGE PLANS: $hdt$_0:y Fetch Operator limit: -1 - $hdt$_2:x + $hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -131,10 +131,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - $hdt$_2:x + $hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -149,9 +149,9 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 Stage: Stage-6 Map Reduce @@ -172,22 +172,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - Position of Big Table: 0 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col0, _col2, _col6 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col6 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -356,7 +356,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_2:z] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/join32_lessSize.q.out b/ql/src/test/results/clientpositive/join32_lessSize.q.out index 544e814..ebe5d82 100644 --- a/ql/src/test/results/clientpositive/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/join32_lessSize.q.out @@ -119,11 +119,11 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:x + $hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:x + $hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -138,9 +138,9 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 Stage: Stage-7 Map Reduce @@ -161,10 +161,10 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - Position of Big Table: 0 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -175,7 +175,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 + columns _col0,_col2 columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -279,7 +279,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_2:z] Stage: Stage-8 Map Reduce Local Work @@ -303,7 +303,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 @@ -316,13 +316,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col0, _col2, _col6 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col6 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -362,7 +362,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 + columns _col0,_col2 columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -371,7 +371,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 + columns _col0,_col2 columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -666,18 +666,18 @@ STAGE PLANS: Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:x + $hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:x + $hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -698,7 +698,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -710,7 +710,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 1 Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -722,7 +722,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col3 + columns _col1,_col2,_col3 columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -780,16 +780,16 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_3:x] + /src1 [$hdt$_2:x] Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:w + $hdt$_0:w Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:w + $hdt$_0:w TableScan alias: w Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -804,7 +804,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) Position of Big Table: 0 @@ -817,9 +817,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -831,7 +831,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 + columns _col1,_col2 columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -850,7 +850,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col3 + columns _col1,_col2,_col3 columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -859,7 +859,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col3 + columns _col1,_col2,_col3 columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -914,11 +914,11 @@ STAGE PLANS: Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:w + $hdt$_3:w Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:w + $hdt$_3:w TableScan alias: w Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -933,7 +933,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Position of Big Table: 0 @@ -946,13 +946,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col1, _col2, _col6 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col6 (type: string) + expressions: _col2 (type: string), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -997,7 +997,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 + columns _col1,_col2 columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -1006,7 +1006,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 + columns _col1,_col2 columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -1308,22 +1308,22 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-7 Stage-6 depends on stages: Stage-8 - Stage-7 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1342,7 +1342,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -1481,12 +1481,12 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:y] + /src [$hdt$_0:$hdt$_0:y] - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_1:z Fetch Operator limit: -1 Partition Description: @@ -1536,7 +1536,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_1:z TableScan alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1551,11 +1551,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 1 + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1564,13 +1564,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 - Position of Big Table: 1 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1695,7 +1695,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### @@ -1920,22 +1920,22 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-7 Stage-6 depends on stages: Stage-8 - Stage-7 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:y + $hdt$_0:$hdt$_0:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:y + $hdt$_0:$hdt$_0:y TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1950,7 +1950,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -2089,12 +2089,12 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_1:$hdt$_2:x] + /src1 [$hdt$_0:$hdt$_1:x] - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_1:z Fetch Operator limit: -1 Partition Description: @@ -2144,7 +2144,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_1:z TableScan alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2159,11 +2159,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 1 + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2172,13 +2172,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 - Position of Big Table: 1 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2313,7 +2313,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### @@ -2458,22 +2458,22 @@ FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res JOIN srcpart x ON (res.value = x.value and x.ds='2008-04-08' and x.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-7 Stage-6 depends on stages: Stage-8 - Stage-7 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -2489,7 +2489,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -2523,14 +2523,14 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:x + $hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:x + $hdt$_1:x TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2543,10 +2543,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2554,12 +2554,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2583,7 +2583,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 @@ -2715,22 +2715,22 @@ FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res JOIN srcpart y ON (res.value = y.value and y.ds='2008-04-08' and y.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-7 Stage-6 depends on stages: Stage-8 - Stage-7 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -2746,7 +2746,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -2780,14 +2780,14 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:y + $hdt$_1:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_1:y TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2800,10 +2800,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2811,12 +2811,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2840,7 +2840,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index 5cb124b..ed18570 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -112,7 +112,7 @@ STAGE PLANS: $hdt$_0:y Fetch Operator limit: -1 - $hdt$_2:x + $hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -131,10 +131,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - $hdt$_2:x + $hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -149,9 +149,9 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 Stage: Stage-6 Map Reduce @@ -172,22 +172,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - Position of Big Table: 0 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col0, _col2, _col6 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col6 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -356,7 +356,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_2:z] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/join_parse.q.out b/ql/src/test/results/clientpositive/join_parse.q.out index ab657cc..5cabcbd 100644 --- a/ql/src/test/results/clientpositive/join_parse.q.out +++ b/ql/src/test/results/clientpositive/join_parse.q.out @@ -433,9 +433,9 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator @@ -443,11 +443,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col4 + 1 _col2 (type: string) + outputColumnNames: _col0, _col5 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -465,33 +465,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and key is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -499,7 +499,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/limit_join_transpose.q.out b/ql/src/test/results/clientpositive/limit_join_transpose.q.out index 759aebb..b84ca3f 100644 --- a/ql/src/test/results/clientpositive/limit_join_transpose.q.out +++ b/ql/src/test/results/clientpositive/limit_join_transpose.q.out @@ -306,13 +306,14 @@ on src1.key = src2.key limit 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -345,7 +346,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -377,23 +378,43 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -407,29 +428,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1234,13 +1258,14 @@ on src1.key = src2.key limit 1 offset 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1275,7 +1300,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1307,23 +1332,45 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1337,30 +1384,33 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Offset of rows: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out b/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out index adacc33..9933bcc 100644 --- a/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out @@ -160,18 +160,17 @@ STAGE PLANS: alias: x Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: llap Map 6 Map Operator Tree: @@ -179,17 +178,18 @@ STAGE PLANS: alias: x Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap Map 7 Map Operator Tree: @@ -237,10 +237,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index 6193580..140d453 100644 --- a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -64,47 +64,47 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:src + $hdt$_1:src1 Fetch Operator limit: -1 - $hdt$_2:src1 + $hdt$_2:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:src + $hdt$_1:src1 TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - $hdt$_2:src1 + $hdt$_2:src TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Position of Big Table: 0 @@ -441,10 +441,10 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:srcpart] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:srcpart] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:srcpart] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:srcpart] Stage: Stage-0 Fetch Operator @@ -467,29 +467,14 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:src + $hdt$_1:src1 Fetch Operator limit: -1 - $hdt$_2:src1 + $hdt$_2:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:src - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value > 'val_450') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - $hdt$_2:src1 + $hdt$_1:src1 TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -504,6 +489,21 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + $hdt$_2:src + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value > 'val_450') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) Stage: Stage-5 Map Reduce diff --git a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out index cb6d92d..377690a 100644 --- a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out @@ -19,53 +19,53 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_0:$hdt$_1:x Fetch Operator limit: -1 - $hdt$_1:$hdt$_2:x + $hdt$_1:z Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_0:$hdt$_1:x TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_1:$hdt$_2:x + $hdt$_1:z TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -96,10 +96,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col0, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -262,53 +262,53 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_0:$hdt$_1:x Fetch Operator limit: -1 - $hdt$_1:$hdt$_2:x + $hdt$_1:z Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_0:$hdt$_1:x TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_1:$hdt$_2:x + $hdt$_1:z TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -339,10 +339,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col0, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/mergejoins.q.out b/ql/src/test/results/clientpositive/mergejoins.q.out index eb3ad8a..f74b6d8 100644 --- a/ql/src/test/results/clientpositive/mergejoins.q.out +++ b/ql/src/test/results/clientpositive/mergejoins.q.out @@ -249,11 +249,11 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 + Left Outer Join0 to 1 + Inner Join 0 to 2 filter predicates: - 0 - 1 {(UDFToDouble(KEY.reducesinkkey0) < UDFToDouble(10))} + 0 {(UDFToDouble(KEY.reducesinkkey0) < UDFToDouble(10))} + 1 2 keys: 0 _col0 (type: string) @@ -261,13 +261,17 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/ppd_join3.q.out b/ql/src/test/results/clientpositive/ppd_join3.q.out index af627bf..ea7974f 100644 --- a/ql/src/test/results/clientpositive/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/ppd_join3.q.out @@ -27,12 +27,12 @@ ON src1.c1 = src3.c5 WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -89,10 +89,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -107,13 +114,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -121,10 +121,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col3 + outputColumnNames: _col0, _col2 Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1767,12 +1767,12 @@ ON src1.c1 = src3.c5 WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1829,10 +1829,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1847,13 +1854,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1861,10 +1861,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col3 + outputColumnNames: _col0, _col2 Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out index 4d89aee..9daa6d8 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out @@ -38,22 +38,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) (type: boolean) + predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -69,40 +70,35 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 + Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -402,22 +398,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) (type: boolean) + predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -433,40 +430,35 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 + Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out index 68e51a7..aaf4676 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out @@ -163,35 +163,35 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Map 5 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 6 Map Operator Tree: TableScan @@ -215,31 +215,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col2 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 4 Reduce Operator Tree: Select Operator @@ -337,35 +341,35 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) > 100.0) (type: boolean) + predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 6 Map Operator Tree: TableScan @@ -391,29 +395,33 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 4 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/join28.q.out b/ql/src/test/results/clientpositive/spark/join28.q.out index 548a806..c535132 100644 --- a/ql/src/test/results/clientpositive/spark/join28.q.out +++ b/ql/src/test/results/clientpositive/spark/join28.q.out @@ -41,18 +41,18 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -62,15 +62,15 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -82,7 +82,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -102,7 +102,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) @@ -114,12 +114,12 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col0, _col2 input vertices: - 0 Map 1 + 1 Map 3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out index 3771217..f7ff361 100644 --- a/ql/src/test/results/clientpositive/spark/join32.q.out +++ b/ql/src/test/results/clientpositive/spark/join32.q.out @@ -110,7 +110,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -126,9 +126,9 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -196,7 +196,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 Local Work: @@ -255,7 +255,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: z @@ -273,26 +273,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 input vertices: - 1 Map 2 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col0, _col2, _col6 input vertices: 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col6 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index c4ac2dd..c908948 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -118,7 +118,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -134,9 +134,9 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -204,7 +204,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 Local Work: @@ -263,7 +263,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: z @@ -281,26 +281,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 input vertices: - 1 Map 2 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col0, _col2, _col6 input vertices: 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col6 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -632,7 +632,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -707,7 +707,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -719,14 +719,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) Position of Big Table: 1 Local Work: @@ -796,7 +796,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Position of Big Table: 0 Local Work: @@ -873,9 +873,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 1 Position of Big Table: 1 @@ -884,15 +884,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col1, _col2, _col6 input vertices: 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col6 (type: string) + expressions: _col2 (type: string), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1238,25 +1238,25 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 1 + 1 _col0 (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -1264,12 +1264,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 @@ -1277,56 +1274,57 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src1 [x] Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Position of Big Table: 0 Local Work: @@ -1336,9 +1334,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 @@ -1346,46 +1347,45 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -1407,7 +1407,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1418,15 +1418,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 input vertices: - 0 Map 1 - Position of Big Table: 1 + 1 Map 3 + Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1767,22 +1767,18 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 1 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -1790,12 +1786,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 @@ -1803,13 +1796,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1819,38 +1810,45 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - Map 2 + /src [y] + Map 3 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 1 + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -1858,9 +1856,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 @@ -1868,11 +1869,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1882,32 +1885,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: x @@ -1929,7 +1929,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1940,15 +1940,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 input vertices: - 0 Map 1 - Position of Big Table: 1 + 1 Map 3 + Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2216,39 +2216,39 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -2257,7 +2257,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -2277,7 +2277,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2287,14 +2287,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 input vertices: - 0 Map 1 + 1 Map 3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2460,39 +2460,39 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -2501,7 +2501,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -2521,7 +2521,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2531,14 +2531,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 input vertices: - 0 Map 1 + 1 Map 3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 3771217..f7ff361 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -110,7 +110,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -126,9 +126,9 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -196,7 +196,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 Local Work: @@ -255,7 +255,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: z @@ -273,26 +273,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 input vertices: - 1 Map 2 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col0, _col2, _col6 input vertices: 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col6 (type: string) + expressions: _col0 (type: string), _col2 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out index b348472..c4b61bc 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out @@ -28,18 +28,18 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -49,15 +49,15 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -69,7 +69,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -89,7 +89,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) @@ -101,12 +101,12 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col0, _col2 input vertices: - 0 Map 1 + 1 Map 3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -278,18 +278,18 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -299,15 +299,15 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -319,7 +319,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -339,7 +339,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) @@ -351,12 +351,12 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col0, _col2 input vertices: - 0 Map 1 + 1 Map 3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/mergejoins.q.out b/ql/src/test/results/clientpositive/spark/mergejoins.q.out index 3f7ec0e..6ac4675 100644 --- a/ql/src/test/results/clientpositive/spark/mergejoins.q.out +++ b/ql/src/test/results/clientpositive/spark/mergejoins.q.out @@ -263,11 +263,11 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 + Left Outer Join0 to 1 + Inner Join 0 to 2 filter predicates: - 0 - 1 {(UDFToDouble(KEY.reducesinkkey0) < UDFToDouble(10))} + 0 {(UDFToDouble(KEY.reducesinkkey0) < UDFToDouble(10))} + 1 2 keys: 0 _col0 (type: string) @@ -275,13 +275,17 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out index 7ce8f9e..38b99bf 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -34,8 +34,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -44,7 +44,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key <> '13') and (key <> '11')) and (key < '400')) and (key <> '12')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + predicate: ((((((((key <> '11') and (key < '400')) and (key <> '12')) and (key <> '13')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -55,41 +55,41 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '11') and (key < '400')) and (key <> '12')) and (key <> '13')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + predicate: (((((((key <> '12') and (key <> '11')) and (key < '400')) and (key <> '13')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key <> '12') and (key <> '11')) and (key < '400')) and (key <> '13')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) + predicate: (((((((key <> '13') and (key <> '11')) and (key < '400')) and (key <> '12')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -98,27 +98,6 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -134,6 +113,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1774,8 +1774,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1784,7 +1784,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key <> '13') and (key <> '11')) and (key < '400')) and (key <> '12')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + predicate: ((((((((key <> '11') and (key < '400')) and (key <> '12')) and (key <> '13')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -1795,41 +1795,41 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '11') and (key < '400')) and (key <> '12')) and (key <> '13')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + predicate: (((((((key <> '12') and (key <> '11')) and (key < '400')) and (key <> '13')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key <> '12') and (key <> '11')) and (key < '400')) and (key <> '13')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) + predicate: (((((((key <> '13') and (key <> '11')) and (key < '400')) and (key <> '12')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -1838,27 +1838,6 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1874,6 +1853,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out index 87580e9..cf21646 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out @@ -43,24 +43,25 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) (type: boolean) + predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -78,41 +79,36 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 + Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -417,24 +413,25 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) (type: boolean) + predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -452,41 +449,36 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 + Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out index d8eb6c8..2251de5 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -193,7 +193,7 @@ Stage-0 Select Operator [SEL_15] (rows=605 width=10) Output:["_col0","_col1","_col2"] Merge Join Operator [MERGEJOIN_26] (rows=605 width=10) - Conds:RS_12._col3=RS_13._col0(Inner),Output:["_col0","_col3","_col6"] + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0","_col2","_col6"] <-Map 5 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col0 @@ -205,27 +205,27 @@ Stage-0 default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] - PartitionCols:_col3 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_25] (rows=550 width=10) - Conds:RS_9._col0=RS_10._col1(Inner),Output:["_col0","_col3"] + Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col2"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_22] (rows=500 width=10) - predicate:value is not null - TableScan [TS_0] (rows=500 width=10) - default@srcpart,z,Tbl:COMPLETE,Col:NONE,Output:["value"] - <-Map 4 [SIMPLE_EDGE] - SHUFFLE [RS_10] PartitionCols:_col1 - Select Operator [SEL_5] (rows=25 width=7) + Select Operator [SEL_2] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=25 width=7) + Filter Operator [FIL_22] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=7) + TableScan [TS_0] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_23] (rows=500 width=10) + predicate:value is not null + TableScan [TS_3] (rows=500 width=10) + default@srcpart,z,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: EXPLAIN select @@ -286,145 +286,143 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Map 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 17 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 - File Output Operator [FS_55] - Limit [LIM_54] (rows=100 width=10) + Reducer 10 + File Output Operator [FS_53] + Limit [LIM_52] (rows=100 width=10) Number of rows:100 - Select Operator [SEL_53] (rows=805 width=10) + Select Operator [SEL_51] (rows=550 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=805 width=10) + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_50] + Select Operator [SEL_49] (rows=550 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_50] (rows=805 width=10) + Group By Operator [GBY_48] (rows=550 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_47] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=1610 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col2, _col12, _col20 - Select Operator [SEL_47] (rows=1610 width=10) - Output:["_col2","_col12","_col20","_col13","_col21","_col3"] - Merge Join Operator [MERGEJOIN_97] (rows=1610 width=10) - Conds:RS_44._col1, _col3=RS_45._col15, _col17(Inner),Output:["_col2","_col3","_col12","_col13","_col20","_col21"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col15, _col17 - Select Operator [SEL_40] (rows=1464 width=10) - Output:["_col14","_col15","_col17","_col6","_col7"] - Merge Join Operator [MERGEJOIN_96] (rows=1464 width=10) - Conds:RS_37._col4, _col6=RS_38._col2, _col4(Inner),Output:["_col2","_col3","_col14","_col15","_col17"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col4, _col6 - Merge Join Operator [MERGEJOIN_94] (rows=1331 width=10) - Conds:RS_34._col3=RS_35._col1(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col1 - Select Operator [SEL_17] (rows=12 width=7) - Output:["_col1"] - Filter Operator [FIL_88] (rows=12 width=7) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_93] (rows=1210 width=10) - Conds:RS_31._col2=RS_32._col0(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_87] (rows=250 width=10) - predicate:((value = 'd1value') and key is not null) - TableScan [TS_12] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_92] (rows=1100 width=10) - Conds:RS_28._col1=RS_29._col3(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col3 - Select Operator [SEL_11] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_86] (rows=42 width=34) - predicate:((((((v3 = 'ssv3') and k2 is not null) and k3 is not null) and k1 is not null) and v1 is not null) and v2 is not null) - TableScan [TS_9] (rows=85 width=34) - default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=1000 width=10) - Output:["_col1"] - Filter Operator [FIL_85] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col2, _col4 - Merge Join Operator [MERGEJOIN_95] (rows=275 width=10) - Conds:RS_24._col0=RS_25._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=42 width=34) - Output:["_col0","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_89] (rows=42 width=34) - predicate:((((((v1 = 'srv1') and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) and k1 is not null) - TableScan [TS_18] (rows=85 width=34) - default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_90] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_21] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_91] (rows=275 width=10) - Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=170 width=34) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=170 width=34) - predicate:((v2 is not null and v3 is not null) and k1 is not null) - TableScan [TS_0] (rows=170 width=34) - default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_42] + Group By Operator [GBY_46] (rows=1100 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col1)","count(_col9)","count(_col17)"],keys:_col16, _col0, _col8 + Select Operator [SEL_45] (rows=1100 width=10) + Output:["_col16","_col0","_col8","_col1","_col9","_col17"] + Merge Join Operator [MERGEJOIN_103] (rows=1100 width=10) + Conds:RS_42._col3=RS_43._col1(Inner),Output:["_col0","_col1","_col8","_col9","_col16","_col17"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col1 + Select Operator [SEL_23] (rows=1000 width=10) + Output:["_col1"] + Filter Operator [FIL_96] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_21] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_102] (rows=332 width=10) + Conds:RS_39._col14=RS_40._col0(Inner),Output:["_col0","_col1","_col3","_col8","_col9","_col16","_col17"] + <-Map 16 [SIMPLE_EDGE] + SHUFFLE [RS_40] PartitionCols:_col0 - Select Operator [SEL_5] (rows=250 width=10) + Select Operator [SEL_20] (rows=250 width=10) Output:["_col0"] - Filter Operator [FIL_84] (rows=250 width=10) + Filter Operator [FIL_95] (rows=250 width=10) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=10) + TableScan [TS_18] (rows=500 width=10) default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col14 + Merge Join Operator [MERGEJOIN_101] (rows=302 width=10) + Conds:RS_36._col6=RS_37._col0(Inner),Output:["_col0","_col1","_col3","_col8","_col9","_col14","_col16","_col17"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_94] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_15] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_100] (rows=275 width=10) + Conds:RS_33._col0=RS_34._col0(Inner),Output:["_col0","_col1","_col3","_col6","_col8","_col9","_col14","_col16","_col17"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_93] (rows=250 width=10) + predicate:((value = 'd1value') and key is not null) + TableScan [TS_12] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_99] (rows=187 width=34) + Conds:RS_30._col9, _col11=RS_31._col1, _col3(Inner),Output:["_col0","_col1","_col3","_col6","_col8","_col9","_col14","_col16","_col17"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col1, _col3 + Select Operator [SEL_11] (rows=170 width=34) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_92] (rows=170 width=34) + predicate:((v2 is not null and v3 is not null) and k1 is not null) + TableScan [TS_9] (rows=170 width=34) + default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col9, _col11 + Merge Join Operator [MERGEJOIN_98] (rows=50 width=35) + Conds:RS_27._col1=RS_28._col1(Inner),Output:["_col0","_col1","_col3","_col6","_col8","_col9","_col11"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=12 width=7) + Output:["_col1"] + Filter Operator [FIL_91] (rows=12 width=7) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_6] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_97] (rows=46 width=34) + Conds:RS_24._col2, _col4=RS_25._col2, _col4(Inner),Output:["_col0","_col1","_col3","_col6","_col8","_col9","_col11"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col2, _col4 + Select Operator [SEL_2] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_89] (rows=42 width=34) + predicate:((((((v3 = 'ssv3') and k2 is not null) and k3 is not null) and k1 is not null) and v1 is not null) and v2 is not null) + TableScan [TS_0] (rows=85 width=34) + default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col2, _col4 + Select Operator [SEL_5] (rows=42 width=34) + Output:["_col0","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_90] (rows=42 width=34) + predicate:((((((v1 = 'srv1') and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) and k1 is not null) + TableScan [TS_3] (rows=85 width=34) + default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -447,151 +445,151 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Union 2 (CONTAINS) -Map 11 <- Union 12 (CONTAINS) -Map 16 <- Union 12 (CONTAINS) -Map 8 <- Union 2 (CONTAINS) -Reducer 13 <- Union 12 (SIMPLE_EDGE) -Reducer 14 <- Map 17 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE) +Map 10 <- Union 8 (CONTAINS) +Map 15 <- Union 16 (CONTAINS) +Map 18 <- Union 16 (CONTAINS) +Map 7 <- Union 8 (CONTAINS) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 17 <- Union 16 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 9 <- Union 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 + Reducer 5 File Output Operator [FS_59] - Group By Operator [GBY_57] (rows=550 width=10) + Group By Operator [GBY_57] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 15 [CONTAINS] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 13 [CONTAINS] Reduce Output Operator [RS_56] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=1100 width=10) + Group By Operator [GBY_55] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_51] (rows=550 width=10) + Select Operator [SEL_51] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_85] (rows=550 width=10) - Conds:RS_48._col2=RS_49._col0(Inner),Output:["_col1","_col2"] - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Select Operator [SEL_44] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_81] (rows=500 width=10) - predicate:key is not null - TableScan [TS_42] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Reducer 14 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_83] (rows=605 width=10) + Conds:RS_48._col2=RS_49._col1(Inner),Output:["_col1","_col4"] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_48] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_84] (rows=288 width=10) - Conds:RS_45._col1=RS_46._col1(Inner),Output:["_col1","_col2"] - <-Map 17 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_81] (rows=550 width=10) + Conds:RS_45._col0=RS_46._col0(Inner),Output:["_col1","_col2"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_76] (rows=500 width=10) + predicate:key is not null + TableScan [TS_26] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_46] - PartitionCols:_col1 - Select Operator [SEL_41] (rows=25 width=7) + PartitionCols:_col0 + Select Operator [SEL_31] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_80] (rows=25 width=7) + Filter Operator [FIL_77] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_39] (rows=25 width=7) + TableScan [TS_29] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col1 - Select Operator [SEL_38] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_37] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 12 [SIMPLE_EDGE] - <-Map 11 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_28] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_78] (rows=25 width=7) - predicate:value is not null - TableScan [TS_26] (rows=25 width=7) - Output:["key","value"] - <-Map 16 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_31] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_79] (rows=500 width=10) - predicate:value is not null - TableScan [TS_29] (rows=500 width=10) - Output:["key","value"] - <-Reducer 5 [CONTAINS] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col1 + Select Operator [SEL_44] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_43] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 16 [SIMPLE_EDGE] + <-Map 15 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_34] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_78] (rows=25 width=7) + predicate:value is not null + TableScan [TS_32] (rows=25 width=7) + Output:["key","value"] + <-Map 18 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_37] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_79] (rows=500 width=10) + predicate:value is not null + TableScan [TS_35] (rows=500 width=10) + Output:["key","value"] + <-Reducer 3 [CONTAINS] Reduce Output Operator [RS_56] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=1100 width=10) + Group By Operator [GBY_55] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=550 width=10) + Select Operator [SEL_25] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_83] (rows=550 width=10) - Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col1","_col2"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_77] (rows=500 width=10) - predicate:key is not null - TableScan [TS_16] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Reducer 4 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_82] (rows=605 width=10) + Conds:RS_22._col2=RS_23._col1(Inner),Output:["_col1","_col4"] + <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_82] (rows=288 width=10) - Conds:RS_19._col1=RS_20._col1(Inner),Output:["_col1","_col2"] - <-Map 9 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_80] (rows=550 width=10) + Conds:RS_19._col0=RS_20._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_72] (rows=500 width=10) + predicate:key is not null + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 6 [SIMPLE_EDGE] SHUFFLE [RS_20] - PartitionCols:_col1 - Select Operator [SEL_15] (rows=25 width=7) + PartitionCols:_col0 + Select Operator [SEL_5] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_76] (rows=25 width=7) + Filter Operator [FIL_73] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_13] (rows=25 width=7) + TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col1 - Select Operator [SEL_12] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_11] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_74] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["key","value"] - <-Map 8 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_75] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["key","value"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col1 + Select Operator [SEL_18] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_17] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 8 [SIMPLE_EDGE] + <-Map 10 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_11] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_75] (rows=500 width=10) + predicate:value is not null + TableScan [TS_9] (rows=500 width=10) + Output:["key","value"] + <-Map 7 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_8] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_74] (rows=25 width=7) + predicate:value is not null + TableScan [TS_6] (rows=25 width=7) + Output:["key","value"] PREHOOK: query: explain SELECT x.key, y.value @@ -622,290 +620,290 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Union 2 (CONTAINS) -Map 10 <- Union 2 (CONTAINS) -Map 13 <- Union 14 (CONTAINS) -Map 20 <- Union 14 (CONTAINS) -Map 21 <- Union 16 (CONTAINS) -Map 24 <- Union 25 (CONTAINS) -Map 33 <- Union 25 (CONTAINS) -Map 34 <- Union 27 (CONTAINS) +Map 12 <- Union 10 (CONTAINS) +Map 17 <- Union 18 (CONTAINS) +Map 22 <- Union 18 (CONTAINS) +Map 23 <- Union 20 (CONTAINS) +Map 28 <- Union 29 (CONTAINS) Map 35 <- Union 29 (CONTAINS) -Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS) -Reducer 17 <- Union 16 (SIMPLE_EDGE) -Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 23 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 26 <- Union 25 (SIMPLE_EDGE), Union 27 (CONTAINS) -Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 30 <- Union 29 (SIMPLE_EDGE) -Reducer 31 <- Map 36 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 32 <- Map 37 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 9 <- Union 8 (SIMPLE_EDGE) +Map 36 <- Union 31 (CONTAINS) +Map 37 <- Union 33 (CONTAINS) +Map 9 <- Union 10 (CONTAINS) +Reducer 11 <- Union 10 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 19 <- Union 18 (SIMPLE_EDGE), Union 20 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 21 <- Union 20 (SIMPLE_EDGE) +Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 32 <- Union 31 (SIMPLE_EDGE), Union 33 (CONTAINS) +Reducer 34 <- Union 33 (SIMPLE_EDGE) +Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 + Reducer 7 File Output Operator [FS_122] - Group By Operator [GBY_120] (rows=550 width=10) + Group By Operator [GBY_120] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 8 [SIMPLE_EDGE] - <-Reducer 32 [CONTAINS] + <-Union 6 [SIMPLE_EDGE] + <-Reducer 26 [CONTAINS] Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1100 width=10) + Group By Operator [GBY_118] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_114] (rows=550 width=10) + Select Operator [SEL_114] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_170] (rows=550 width=10) - Conds:RS_111._col2=RS_112._col0(Inner),Output:["_col2","_col5"] - <-Map 37 [SIMPLE_EDGE] - SHUFFLE [RS_112] - PartitionCols:_col0 - Select Operator [SEL_107] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=500 width=10) - predicate:key is not null - TableScan [TS_105] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 31 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_164] (rows=605 width=10) + Conds:RS_111._col3=RS_112._col1(Inner),Output:["_col1","_col2"] + <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_111] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_169] (rows=484 width=10) - Conds:RS_108._col1=RS_109._col1(Inner),Output:["_col2"] - <-Map 36 [SIMPLE_EDGE] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_161] (rows=550 width=10) + Conds:RS_108._col0=RS_109._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 24 [SIMPLE_EDGE] + SHUFFLE [RS_108] + PartitionCols:_col0 + Select Operator [SEL_71] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_153] (rows=500 width=10) + predicate:key is not null + TableScan [TS_69] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 27 [SIMPLE_EDGE] SHUFFLE [RS_109] - PartitionCols:_col1 - Select Operator [SEL_104] (rows=25 width=7) + PartitionCols:_col0 + Select Operator [SEL_74] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_163] (rows=25 width=7) + Filter Operator [FIL_154] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_102] (rows=25 width=7) + TableScan [TS_72] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_108] - PartitionCols:_col1 - Select Operator [SEL_101] (rows=440 width=10) - Output:["_col1"] - Group By Operator [GBY_100] (rows=440 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 29 [SIMPLE_EDGE] - <-Map 35 [CONTAINS] - Reduce Output Operator [RS_99] - PartitionCols:_col0, _col1 - Group By Operator [GBY_98] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_94] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_162] (rows=500 width=10) - predicate:value is not null - TableScan [TS_92] (rows=500 width=10) - Output:["key","value"] - <-Reducer 28 [CONTAINS] - Reduce Output Operator [RS_99] - PartitionCols:_col0, _col1 - Group By Operator [GBY_98] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_90] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 27 [SIMPLE_EDGE] - <-Map 34 [CONTAINS] - Reduce Output Operator [RS_89] - PartitionCols:_col0, _col1 - Group By Operator [GBY_88] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_84] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_161] (rows=500 width=10) - predicate:value is not null - TableScan [TS_82] (rows=500 width=10) - Output:["key","value"] - <-Reducer 26 [CONTAINS] - Reduce Output Operator [RS_89] - PartitionCols:_col0, _col1 - Group By Operator [GBY_88] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_80] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 25 [SIMPLE_EDGE] - <-Map 24 [CONTAINS] - Reduce Output Operator [RS_79] - PartitionCols:_col0, _col1 - Group By Operator [GBY_78] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_71] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=25 width=7) - predicate:value is not null - TableScan [TS_69] (rows=25 width=7) - Output:["key","value"] - <-Map 33 [CONTAINS] - Reduce Output Operator [RS_79] - PartitionCols:_col0, _col1 - Group By Operator [GBY_78] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_74] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=500 width=10) - predicate:value is not null - TableScan [TS_72] (rows=500 width=10) - Output:["key","value"] - <-Reducer 7 [CONTAINS] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_112] + PartitionCols:_col1 + Select Operator [SEL_107] (rows=440 width=10) + Output:["_col1"] + Group By Operator [GBY_106] (rows=440 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 33 [SIMPLE_EDGE] + <-Map 37 [CONTAINS] + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_100] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_158] (rows=500 width=10) + predicate:value is not null + TableScan [TS_98] (rows=500 width=10) + Output:["key","value"] + <-Reducer 32 [CONTAINS] + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_96] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 31 [SIMPLE_EDGE] + <-Map 36 [CONTAINS] + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_90] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=500 width=10) + predicate:value is not null + TableScan [TS_88] (rows=500 width=10) + Output:["key","value"] + <-Reducer 30 [CONTAINS] + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_86] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 29 [SIMPLE_EDGE] + <-Map 28 [CONTAINS] + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_77] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=25 width=7) + predicate:value is not null + TableScan [TS_75] (rows=25 width=7) + Output:["key","value"] + <-Map 35 [CONTAINS] + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_80] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=500 width=10) + predicate:value is not null + TableScan [TS_78] (rows=500 width=10) + Output:["key","value"] + <-Reducer 5 [CONTAINS] Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1100 width=10) + Group By Operator [GBY_118] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_67] (rows=550 width=10) + Group By Operator [GBY_67] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 19 [CONTAINS] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 15 [CONTAINS] Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1100 width=10) + Group By Operator [GBY_65] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_61] (rows=550 width=10) + Select Operator [SEL_61] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_168] (rows=550 width=10) - Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] - <-Map 23 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=500 width=10) - predicate:key is not null - TableScan [TS_52] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 18 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_163] (rows=605 width=10) + Conds:RS_58._col3=RS_59._col1(Inner),Output:["_col1","_col2"] + <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_58] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_167] (rows=419 width=10) - Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] - <-Map 22 [SIMPLE_EDGE] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_160] (rows=550 width=10) + Conds:RS_55._col0=RS_56._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_148] (rows=500 width=10) + predicate:key is not null + TableScan [TS_26] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_56] - PartitionCols:_col1 - Select Operator [SEL_51] (rows=25 width=7) + PartitionCols:_col0 + Select Operator [SEL_31] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=25 width=7) + Filter Operator [FIL_149] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_49] (rows=25 width=7) + TableScan [TS_29] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=381 width=10) - Output:["_col1"] - Group By Operator [GBY_47] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 16 [SIMPLE_EDGE] - <-Map 21 [CONTAINS] - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_41] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=500 width=10) - predicate:value is not null - TableScan [TS_39] (rows=500 width=10) - Output:["key","value"] - <-Reducer 15 [CONTAINS] - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_37] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_28] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=25 width=7) - predicate:value is not null - TableScan [TS_26] (rows=25 width=7) - Output:["key","value"] - <-Map 20 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_31] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=500 width=10) - predicate:value is not null - TableScan [TS_29] (rows=500 width=10) - Output:["key","value"] - <-Reducer 5 [CONTAINS] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col1 + Select Operator [SEL_54] (rows=381 width=10) + Output:["_col1"] + Group By Operator [GBY_53] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 20 [SIMPLE_EDGE] + <-Map 23 [CONTAINS] + Reduce Output Operator [RS_52] + PartitionCols:_col0, _col1 + Group By Operator [GBY_51] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_47] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_152] (rows=500 width=10) + predicate:value is not null + TableScan [TS_45] (rows=500 width=10) + Output:["key","value"] + <-Reducer 19 [CONTAINS] + Reduce Output Operator [RS_52] + PartitionCols:_col0, _col1 + Group By Operator [GBY_51] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_43] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 18 [SIMPLE_EDGE] + <-Map 17 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_34] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_150] (rows=25 width=7) + predicate:value is not null + TableScan [TS_32] (rows=25 width=7) + Output:["key","value"] + <-Map 22 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_37] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_151] (rows=500 width=10) + predicate:value is not null + TableScan [TS_35] (rows=500 width=10) + Output:["key","value"] + <-Reducer 3 [CONTAINS] Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1100 width=10) + Group By Operator [GBY_65] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=550 width=10) + Select Operator [SEL_25] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_166] (rows=550 width=10) - Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] - <-Map 12 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_162] (rows=605 width=10) + Conds:RS_22._col3=RS_23._col1(Inner),Output:["_col1","_col2"] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_23] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_153] (rows=500 width=10) - predicate:key is not null - TableScan [TS_16] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 4 [SIMPLE_EDGE] + PartitionCols:_col1 + Select Operator [SEL_18] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_17] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 10 [SIMPLE_EDGE] + <-Map 12 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_11] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_147] (rows=500 width=10) + predicate:value is not null + TableScan [TS_9] (rows=500 width=10) + Output:["key","value"] + <-Map 9 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_8] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_146] (rows=25 width=7) + predicate:value is not null + TableScan [TS_6] (rows=25 width=7) + Output:["key","value"] + <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_22] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_165] (rows=288 width=10) - Conds:RS_19._col1=RS_20._col1(Inner),Output:["_col2"] - <-Map 11 [SIMPLE_EDGE] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_159] (rows=550 width=10) + Conds:RS_19._col0=RS_20._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_144] (rows=500 width=10) + predicate:key is not null + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 8 [SIMPLE_EDGE] SHUFFLE [RS_20] - PartitionCols:_col1 - Select Operator [SEL_15] (rows=25 width=7) + PartitionCols:_col0 + Select Operator [SEL_5] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_152] (rows=25 width=7) + Filter Operator [FIL_145] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_13] (rows=25 width=7) + TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col1 - Select Operator [SEL_12] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_11] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["key","value"] - <-Map 10 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_151] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["key","value"] PREHOOK: query: EXPLAIN SELECT x.key, z.value, y.value @@ -920,18 +918,18 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Map 1 + Map 2 File Output Operator [FS_16] Select Operator [SEL_15] (rows=605 width=10) Output:["_col0","_col1","_col2"] Map Join Operator [MAPJOIN_26] (rows=605 width=10) - Conds:MAPJOIN_25._col3=RS_13._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col3","_col6"] + Conds:MAPJOIN_25._col0=RS_13._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col2","_col6"] <-Map 3 [BROADCAST_EDGE] BROADCAST [RS_13] PartitionCols:_col0 @@ -942,21 +940,21 @@ Stage-0 TableScan [TS_6] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Map Join Operator [MAPJOIN_25] (rows=550 width=10) - Conds:SEL_2._col0=RS_10._col1(Inner),HybridGraceHashJoin:true,Output:["_col0","_col3"] - <-Map 2 [BROADCAST_EDGE] - BROADCAST [RS_10] + Conds:RS_9._col1=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col2"] + <-Map 1 [BROADCAST_EDGE] + BROADCAST [RS_9] PartitionCols:_col1 - Select Operator [SEL_5] (rows=25 width=7) + Select Operator [SEL_2] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=25 width=7) + Filter Operator [FIL_22] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=7) + TableScan [TS_0] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=500 width=10) + <-Select Operator [SEL_5] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_22] (rows=500 width=10) + Filter Operator [FIL_23] (rows=500 width=10) predicate:value is not null - TableScan [TS_0] (rows=500 width=10) + TableScan [TS_3] (rows=500 width=10) default@srcpart,z,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: EXPLAIN @@ -1018,120 +1016,117 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE) -Map 10 <- Map 9 (BROADCAST_EDGE) -Map 3 <- Map 1 (BROADCAST_EDGE), Map 10 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) -Reducer 4 <- Map 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) +Map 8 <- Map 1 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 - File Output Operator [FS_55] - Limit [LIM_54] (rows=100 width=10) + Reducer 10 + File Output Operator [FS_53] + Limit [LIM_52] (rows=100 width=10) Number of rows:100 - Select Operator [SEL_53] (rows=805 width=10) + Select Operator [SEL_51] (rows=550 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=805 width=10) + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_50] + Select Operator [SEL_49] (rows=550 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_50] (rows=805 width=10) + Group By Operator [GBY_48] (rows=550 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_47] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=1610 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col2, _col12, _col20 - Select Operator [SEL_47] (rows=1610 width=10) - Output:["_col2","_col12","_col20","_col13","_col21","_col3"] - Map Join Operator [MAPJOIN_97] (rows=1610 width=10) - Conds:RS_44._col1, _col3=SEL_40._col15, _col17(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col12","_col13","_col20","_col21"] + Group By Operator [GBY_46] (rows=1100 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col1)","count(_col9)","count(_col17)"],keys:_col16, _col0, _col8 + Select Operator [SEL_45] (rows=1100 width=10) + Output:["_col16","_col0","_col8","_col1","_col9","_col17"] + Map Join Operator [MAPJOIN_103] (rows=1100 width=10) + Conds:RS_42._col3=SEL_23._col1(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col8","_col9","_col16","_col17"] <-Map 1 [BROADCAST_EDGE] - BROADCAST [RS_44] - PartitionCols:_col1, _col3 - Map Join Operator [MAPJOIN_91] (rows=275 width=10) - Conds:SEL_2._col0=RS_42._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] - <-Map 2 [BROADCAST_EDGE] - BROADCAST [RS_42] + BROADCAST [RS_42] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_102] (rows=332 width=10) + Conds:MAPJOIN_101._col14=RS_40._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3","_col8","_col9","_col16","_col17"] + <-Map 7 [BROADCAST_EDGE] + BROADCAST [RS_40] PartitionCols:_col0 - Select Operator [SEL_5] (rows=250 width=10) + Select Operator [SEL_20] (rows=250 width=10) Output:["_col0"] - Filter Operator [FIL_84] (rows=250 width=10) + Filter Operator [FIL_95] (rows=250 width=10) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=10) + TableScan [TS_18] (rows=500 width=10) default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=170 width=34) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=170 width=34) - predicate:((v2 is not null and v3 is not null) and k1 is not null) - TableScan [TS_0] (rows=170 width=34) - default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Select Operator [SEL_40] (rows=1464 width=10) - Output:["_col14","_col15","_col17","_col6","_col7"] - Map Join Operator [MAPJOIN_96] (rows=1464 width=10) - Conds:MAPJOIN_94._col4, _col6=RS_38._col2, _col4(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col14","_col15","_col17"] - <-Map 10 [BROADCAST_EDGE] - BROADCAST [RS_38] - PartitionCols:_col2, _col4 - Map Join Operator [MAPJOIN_95] (rows=275 width=10) - Conds:RS_24._col0=SEL_23._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col5"] - <-Map 9 [BROADCAST_EDGE] - BROADCAST [RS_24] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=42 width=34) - Output:["_col0","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_89] (rows=42 width=34) - predicate:((((((v1 = 'srv1') and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) and k1 is not null) - TableScan [TS_18] (rows=85 width=34) - default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_23] (rows=250 width=10) + <-Map Join Operator [MAPJOIN_101] (rows=302 width=10) + Conds:MAPJOIN_100._col6=RS_37._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3","_col8","_col9","_col14","_col16","_col17"] + <-Map 6 [BROADCAST_EDGE] + BROADCAST [RS_37] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=250 width=10) Output:["_col0"] - Filter Operator [FIL_90] (rows=250 width=10) + Filter Operator [FIL_94] (rows=250 width=10) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_21] (rows=500 width=10) + TableScan [TS_15] (rows=500 width=10) default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_94] (rows=1331 width=10) - Conds:MAPJOIN_93._col3=RS_35._col1(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] - <-Map 8 [BROADCAST_EDGE] - BROADCAST [RS_35] - PartitionCols:_col1 - Select Operator [SEL_17] (rows=12 width=7) - Output:["_col1"] - Filter Operator [FIL_88] (rows=12 width=7) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_93] (rows=1210 width=10) - Conds:MAPJOIN_92._col2=RS_32._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] - <-Map 7 [BROADCAST_EDGE] - BROADCAST [RS_32] + <-Map Join Operator [MAPJOIN_100] (rows=275 width=10) + Conds:MAPJOIN_99._col0=RS_34._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3","_col6","_col8","_col9","_col14","_col16","_col17"] + <-Map 5 [BROADCAST_EDGE] + BROADCAST [RS_34] PartitionCols:_col0 Select Operator [SEL_14] (rows=250 width=10) Output:["_col0"] - Filter Operator [FIL_87] (rows=250 width=10) + Filter Operator [FIL_93] (rows=250 width=10) predicate:((value = 'd1value') and key is not null) TableScan [TS_12] (rows=500 width=10) default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_92] (rows=1100 width=10) - Conds:SEL_8._col1=RS_29._col3(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] - <-Map 6 [BROADCAST_EDGE] - BROADCAST [RS_29] - PartitionCols:_col3 - Select Operator [SEL_11] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_86] (rows=42 width=34) - predicate:((((((v3 = 'ssv3') and k2 is not null) and k3 is not null) and k1 is not null) and v1 is not null) and v2 is not null) - TableScan [TS_9] (rows=85 width=34) - default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_8] (rows=1000 width=10) - Output:["_col1"] - Filter Operator [FIL_85] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_99] (rows=187 width=34) + Conds:MAPJOIN_98._col9, _col11=RS_31._col1, _col3(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3","_col6","_col8","_col9","_col14","_col16","_col17"] + <-Map 4 [BROADCAST_EDGE] + BROADCAST [RS_31] + PartitionCols:_col1, _col3 + Select Operator [SEL_11] (rows=170 width=34) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_92] (rows=170 width=34) + predicate:((v2 is not null and v3 is not null) and k1 is not null) + TableScan [TS_9] (rows=170 width=34) + default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] + <-Map Join Operator [MAPJOIN_98] (rows=50 width=35) + Conds:MAPJOIN_97._col1=RS_28._col1(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3","_col6","_col8","_col9","_col11"] + <-Map 3 [BROADCAST_EDGE] + BROADCAST [RS_28] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=12 width=7) + Output:["_col1"] + Filter Operator [FIL_91] (rows=12 width=7) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_6] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_97] (rows=46 width=34) + Conds:SEL_2._col2, _col4=RS_25._col2, _col4(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3","_col6","_col8","_col9","_col11"] + <-Map 2 [BROADCAST_EDGE] + BROADCAST [RS_25] + PartitionCols:_col2, _col4 + Select Operator [SEL_5] (rows=42 width=34) + Output:["_col0","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_90] (rows=42 width=34) + predicate:((((((v1 = 'srv1') and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) and k1 is not null) + TableScan [TS_3] (rows=85 width=34) + default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_2] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_89] (rows=42 width=34) + predicate:((((((v3 = 'ssv3') and k2 is not null) and k3 is not null) and k1 is not null) and v1 is not null) and v2 is not null) + TableScan [TS_0] (rows=85 width=34) + default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_23] (rows=1000 width=10) + Output:["_col1"] + Filter Operator [FIL_96] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_21] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -1154,135 +1149,137 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Union 2 (CONTAINS) -Map 12 <- Union 10 (CONTAINS) -Map 6 <- Union 2 (CONTAINS) -Map 9 <- Union 10 (CONTAINS) -Reducer 11 <- Map 13 (BROADCAST_EDGE), Map 14 (BROADCAST_EDGE), Union 10 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 3 <- Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Union 4 (SIMPLE_EDGE) +Map 1 <- Map 2 (BROADCAST_EDGE) +Map 11 <- Union 12 (CONTAINS) +Map 14 <- Union 12 (CONTAINS) +Map 3 <- Union 4 (CONTAINS) +Map 8 <- Union 4 (CONTAINS) +Map 9 <- Map 10 (BROADCAST_EDGE) +Reducer 13 <- Map 9 (BROADCAST_EDGE), Union 12 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 5 + Reducer 7 File Output Operator [FS_59] - Group By Operator [GBY_57] (rows=550 width=10) + Group By Operator [GBY_57] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 11 [CONTAINS] + <-Union 6 [SIMPLE_EDGE] + <-Reducer 13 [CONTAINS] Reduce Output Operator [RS_56] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=1100 width=10) + Group By Operator [GBY_55] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_51] (rows=550 width=10) + Select Operator [SEL_51] (rows=605 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_85] (rows=550 width=10) - Conds:MAPJOIN_84._col2=RS_49._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] - <-Map 14 [BROADCAST_EDGE] - BROADCAST [RS_49] - PartitionCols:_col0 - Select Operator [SEL_44] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_81] (rows=500 width=10) - predicate:key is not null - TableScan [TS_42] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map Join Operator [MAPJOIN_84] (rows=288 width=10) - Conds:SEL_38._col1=RS_46._col1(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] - <-Map 13 [BROADCAST_EDGE] - BROADCAST [RS_46] - PartitionCols:_col1 - Select Operator [SEL_41] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_80] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_39] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_38] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_37] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 10 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_31] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_79] (rows=500 width=10) - predicate:value is not null - TableScan [TS_29] (rows=500 width=10) - Output:["key","value"] - <-Map 9 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_28] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_78] (rows=25 width=7) - predicate:value is not null - TableScan [TS_26] (rows=25 width=7) - Output:["key","value"] - <-Reducer 3 [CONTAINS] + Map Join Operator [MAPJOIN_83] (rows=605 width=10) + Conds:RS_48._col2=SEL_44._col1(Inner),HybridGraceHashJoin:true,Output:["_col1","_col4"] + <-Map 9 [BROADCAST_EDGE] + BROADCAST [RS_48] + PartitionCols:_col2 + Map Join Operator [MAPJOIN_81] (rows=550 width=10) + Conds:SEL_28._col0=RS_46._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] + <-Map 10 [BROADCAST_EDGE] + BROADCAST [RS_46] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_77] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_29] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_28] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_76] (rows=500 width=10) + predicate:key is not null + TableScan [TS_26] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_44] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_43] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 12 [SIMPLE_EDGE] + <-Map 11 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_34] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_78] (rows=25 width=7) + predicate:value is not null + TableScan [TS_32] (rows=25 width=7) + Output:["key","value"] + <-Map 14 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_37] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_79] (rows=500 width=10) + predicate:value is not null + TableScan [TS_35] (rows=500 width=10) + Output:["key","value"] + <-Reducer 5 [CONTAINS] Reduce Output Operator [RS_56] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=1100 width=10) + Group By Operator [GBY_55] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=550 width=10) + Select Operator [SEL_25] (rows=605 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_83] (rows=550 width=10) - Conds:MAPJOIN_82._col2=RS_23._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] - <-Map 8 [BROADCAST_EDGE] - BROADCAST [RS_23] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_77] (rows=500 width=10) - predicate:key is not null - TableScan [TS_16] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map Join Operator [MAPJOIN_82] (rows=288 width=10) - Conds:SEL_12._col1=RS_20._col1(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] - <-Map 7 [BROADCAST_EDGE] - BROADCAST [RS_20] - PartitionCols:_col1 - Select Operator [SEL_15] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_76] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_13] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_12] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_11] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_74] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["key","value"] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_75] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["key","value"] + Map Join Operator [MAPJOIN_82] (rows=605 width=10) + Conds:RS_22._col2=SEL_18._col1(Inner),HybridGraceHashJoin:true,Output:["_col1","_col4"] + <-Map 1 [BROADCAST_EDGE] + BROADCAST [RS_22] + PartitionCols:_col2 + Map Join Operator [MAPJOIN_80] (rows=550 width=10) + Conds:SEL_2._col0=RS_20._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] + <-Map 2 [BROADCAST_EDGE] + BROADCAST [RS_20] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_73] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_72] (rows=500 width=10) + predicate:key is not null + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_18] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_17] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 4 [SIMPLE_EDGE] + <-Map 3 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_8] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_74] (rows=25 width=7) + predicate:value is not null + TableScan [TS_6] (rows=25 width=7) + Output:["key","value"] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_11] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_75] (rows=500 width=10) + predicate:value is not null + TableScan [TS_9] (rows=500 width=10) + Output:["key","value"] PREHOOK: query: explain SELECT x.key, y.value @@ -1313,266 +1310,269 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Union 2 (CONTAINS) -Map 11 <- Union 12 (CONTAINS) -Map 16 <- Union 12 (CONTAINS) -Map 17 <- Union 14 (CONTAINS) -Map 20 <- Union 21 (CONTAINS) -Map 27 <- Union 21 (CONTAINS) -Map 28 <- Union 23 (CONTAINS) -Map 29 <- Union 25 (CONTAINS) -Map 8 <- Union 2 (CONTAINS) -Reducer 13 <- Union 12 (SIMPLE_EDGE), Union 14 (CONTAINS) -Reducer 15 <- Map 18 (BROADCAST_EDGE), Map 19 (BROADCAST_EDGE), Union 14 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 22 <- Union 21 (SIMPLE_EDGE), Union 23 (CONTAINS) +Map 1 <- Map 2 (BROADCAST_EDGE) +Map 10 <- Union 4 (CONTAINS) +Map 11 <- Map 12 (BROADCAST_EDGE) +Map 13 <- Union 14 (CONTAINS) +Map 18 <- Union 14 (CONTAINS) +Map 19 <- Union 16 (CONTAINS) +Map 20 <- Map 21 (BROADCAST_EDGE) +Map 22 <- Union 23 (CONTAINS) +Map 29 <- Union 23 (CONTAINS) +Map 3 <- Union 4 (CONTAINS) +Map 30 <- Union 25 (CONTAINS) +Map 31 <- Union 27 (CONTAINS) +Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS) +Reducer 17 <- Map 11 (BROADCAST_EDGE), Union 16 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 24 <- Union 23 (SIMPLE_EDGE), Union 25 (CONTAINS) -Reducer 26 <- Map 30 (BROADCAST_EDGE), Map 31 (BROADCAST_EDGE), Union 25 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 3 <- Map 10 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE) +Reducer 26 <- Union 25 (SIMPLE_EDGE), Union 27 (CONTAINS) +Reducer 28 <- Map 20 (BROADCAST_EDGE), Union 27 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 9 <- Union 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 + Reducer 9 File Output Operator [FS_122] - Group By Operator [GBY_120] (rows=550 width=10) + Group By Operator [GBY_120] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 26 [CONTAINS] + <-Union 8 [SIMPLE_EDGE] + <-Reducer 28 [CONTAINS] Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1100 width=10) + Group By Operator [GBY_118] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_114] (rows=550 width=10) + Select Operator [SEL_114] (rows=605 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_170] (rows=550 width=10) - Conds:MAPJOIN_169._col2=RS_112._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col5"] - <-Map 31 [BROADCAST_EDGE] - BROADCAST [RS_112] - PartitionCols:_col0 - Select Operator [SEL_107] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=500 width=10) - predicate:key is not null - TableScan [TS_105] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_169] (rows=484 width=10) - Conds:SEL_101._col1=RS_109._col1(Inner),HybridGraceHashJoin:true,Output:["_col2"] - <-Map 30 [BROADCAST_EDGE] - BROADCAST [RS_109] - PartitionCols:_col1 - Select Operator [SEL_104] (rows=25 width=7) + Map Join Operator [MAPJOIN_164] (rows=605 width=10) + Conds:RS_111._col3=SEL_107._col1(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] + <-Map 20 [BROADCAST_EDGE] + BROADCAST [RS_111] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_161] (rows=550 width=10) + Conds:SEL_71._col0=RS_109._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] + <-Map 21 [BROADCAST_EDGE] + BROADCAST [RS_109] + PartitionCols:_col0 + Select Operator [SEL_74] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_72] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_71] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_163] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_102] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_101] (rows=440 width=10) - Output:["_col1"] - Group By Operator [GBY_100] (rows=440 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 25 [SIMPLE_EDGE] - <-Map 29 [CONTAINS] - Reduce Output Operator [RS_99] - PartitionCols:_col0, _col1 - Group By Operator [GBY_98] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_94] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_162] (rows=500 width=10) - predicate:value is not null - TableScan [TS_92] (rows=500 width=10) - Output:["key","value"] - <-Reducer 24 [CONTAINS] - Reduce Output Operator [RS_99] - PartitionCols:_col0, _col1 - Group By Operator [GBY_98] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_90] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 23 [SIMPLE_EDGE] - <-Map 28 [CONTAINS] - Reduce Output Operator [RS_89] - PartitionCols:_col0, _col1 - Group By Operator [GBY_88] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_84] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_161] (rows=500 width=10) - predicate:value is not null - TableScan [TS_82] (rows=500 width=10) - Output:["key","value"] - <-Reducer 22 [CONTAINS] - Reduce Output Operator [RS_89] - PartitionCols:_col0, _col1 - Group By Operator [GBY_88] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_80] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 21 [SIMPLE_EDGE] - <-Map 20 [CONTAINS] - Reduce Output Operator [RS_79] - PartitionCols:_col0, _col1 - Group By Operator [GBY_78] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_71] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=25 width=7) - predicate:value is not null - TableScan [TS_69] (rows=25 width=7) - Output:["key","value"] - <-Map 27 [CONTAINS] - Reduce Output Operator [RS_79] - PartitionCols:_col0, _col1 - Group By Operator [GBY_78] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_74] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=500 width=10) - predicate:value is not null - TableScan [TS_72] (rows=500 width=10) - Output:["key","value"] - <-Reducer 5 [CONTAINS] + Filter Operator [FIL_153] (rows=500 width=10) + predicate:key is not null + TableScan [TS_69] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_107] (rows=440 width=10) + Output:["_col1"] + Group By Operator [GBY_106] (rows=440 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 27 [SIMPLE_EDGE] + <-Map 31 [CONTAINS] + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_100] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_158] (rows=500 width=10) + predicate:value is not null + TableScan [TS_98] (rows=500 width=10) + Output:["key","value"] + <-Reducer 26 [CONTAINS] + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_96] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 25 [SIMPLE_EDGE] + <-Map 30 [CONTAINS] + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_90] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=500 width=10) + predicate:value is not null + TableScan [TS_88] (rows=500 width=10) + Output:["key","value"] + <-Reducer 24 [CONTAINS] + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_86] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_77] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=25 width=7) + predicate:value is not null + TableScan [TS_75] (rows=25 width=7) + Output:["key","value"] + <-Map 29 [CONTAINS] + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_80] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=500 width=10) + predicate:value is not null + TableScan [TS_78] (rows=500 width=10) + Output:["key","value"] + <-Reducer 7 [CONTAINS] Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1100 width=10) + Group By Operator [GBY_118] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_67] (rows=550 width=10) + Group By Operator [GBY_67] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 15 [CONTAINS] + <-Union 6 [SIMPLE_EDGE] + <-Reducer 17 [CONTAINS] Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1100 width=10) + Group By Operator [GBY_65] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_61] (rows=550 width=10) + Select Operator [SEL_61] (rows=605 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_168] (rows=550 width=10) - Conds:MAPJOIN_167._col2=RS_59._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col5"] - <-Map 19 [BROADCAST_EDGE] - BROADCAST [RS_59] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=500 width=10) - predicate:key is not null - TableScan [TS_52] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_167] (rows=419 width=10) - Conds:SEL_48._col1=RS_56._col1(Inner),HybridGraceHashJoin:true,Output:["_col2"] - <-Map 18 [BROADCAST_EDGE] - BROADCAST [RS_56] - PartitionCols:_col1 - Select Operator [SEL_51] (rows=25 width=7) + Map Join Operator [MAPJOIN_163] (rows=605 width=10) + Conds:RS_58._col3=SEL_54._col1(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] + <-Map 11 [BROADCAST_EDGE] + BROADCAST [RS_58] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_160] (rows=550 width=10) + Conds:SEL_28._col0=RS_56._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] + <-Map 12 [BROADCAST_EDGE] + BROADCAST [RS_56] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_149] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_29] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_28] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_49] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_48] (rows=381 width=10) - Output:["_col1"] - Group By Operator [GBY_47] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 14 [SIMPLE_EDGE] - <-Map 17 [CONTAINS] - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_41] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=500 width=10) - predicate:value is not null - TableScan [TS_39] (rows=500 width=10) - Output:["key","value"] - <-Reducer 13 [CONTAINS] - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_37] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 12 [SIMPLE_EDGE] - <-Map 11 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_28] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=25 width=7) - predicate:value is not null - TableScan [TS_26] (rows=25 width=7) - Output:["key","value"] - <-Map 16 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_31] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=500 width=10) - predicate:value is not null - TableScan [TS_29] (rows=500 width=10) - Output:["key","value"] - <-Reducer 3 [CONTAINS] + Filter Operator [FIL_148] (rows=500 width=10) + predicate:key is not null + TableScan [TS_26] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_54] (rows=381 width=10) + Output:["_col1"] + Group By Operator [GBY_53] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 16 [SIMPLE_EDGE] + <-Map 19 [CONTAINS] + Reduce Output Operator [RS_52] + PartitionCols:_col0, _col1 + Group By Operator [GBY_51] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_47] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_152] (rows=500 width=10) + predicate:value is not null + TableScan [TS_45] (rows=500 width=10) + Output:["key","value"] + <-Reducer 15 [CONTAINS] + Reduce Output Operator [RS_52] + PartitionCols:_col0, _col1 + Group By Operator [GBY_51] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_43] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_34] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_150] (rows=25 width=7) + predicate:value is not null + TableScan [TS_32] (rows=25 width=7) + Output:["key","value"] + <-Map 18 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_37] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_151] (rows=500 width=10) + predicate:value is not null + TableScan [TS_35] (rows=500 width=10) + Output:["key","value"] + <-Reducer 5 [CONTAINS] Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1100 width=10) + Group By Operator [GBY_65] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=550 width=10) + Select Operator [SEL_25] (rows=605 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_166] (rows=550 width=10) - Conds:MAPJOIN_165._col2=RS_23._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col5"] - <-Map 10 [BROADCAST_EDGE] - BROADCAST [RS_23] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_153] (rows=500 width=10) - predicate:key is not null - TableScan [TS_16] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_165] (rows=288 width=10) - Conds:SEL_12._col1=RS_20._col1(Inner),HybridGraceHashJoin:true,Output:["_col2"] - <-Map 9 [BROADCAST_EDGE] - BROADCAST [RS_20] - PartitionCols:_col1 - Select Operator [SEL_15] (rows=25 width=7) + Map Join Operator [MAPJOIN_162] (rows=605 width=10) + Conds:RS_22._col3=SEL_18._col1(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] + <-Map 1 [BROADCAST_EDGE] + BROADCAST [RS_22] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_159] (rows=550 width=10) + Conds:SEL_2._col0=RS_20._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] + <-Map 2 [BROADCAST_EDGE] + BROADCAST [RS_20] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_145] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_152] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_13] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_12] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_11] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["key","value"] - <-Map 8 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_151] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["key","value"] + Filter Operator [FIL_144] (rows=500 width=10) + predicate:key is not null + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_18] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_17] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 4 [SIMPLE_EDGE] + <-Map 10 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_11] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_147] (rows=500 width=10) + predicate:value is not null + TableScan [TS_9] (rows=500 width=10) + Output:["key","value"] + <-Map 3 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_8] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_146] (rows=25 width=7) + predicate:value is not null + TableScan [TS_6] (rows=25 width=7) + Output:["key","value"] PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -2047,78 +2047,81 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 12 <- Union 9 (CONTAINS) - Map 13 <- Union 9 (CONTAINS) + Map 1 <- Map 4 (BROADCAST_EDGE) + Map 12 <- Union 13 (CONTAINS) + Map 14 <- Union 13 (CONTAINS) + Map 15 <- Union 13 (CONTAINS) Map 16 <- Map 17 (BROADCAST_EDGE) - Map 18 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 19 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 20 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 21 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 8 <- Union 9 (CONTAINS) - Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) - Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Map 18 <- Map 16 (BROADCAST_EDGE), Union 3 (CONTAINS) + Map 19 <- Map 16 (BROADCAST_EDGE), Union 3 (CONTAINS) + Map 20 <- Map 16 (BROADCAST_EDGE), Union 3 (CONTAINS) + Map 21 <- Map 16 (BROADCAST_EDGE), Union 3 (CONTAINS) + Map 5 <- Union 6 (CONTAINS) + Map 7 <- Union 6 (CONTAINS) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 13 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 6 (SIMPLE_EDGE) + Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 6 - Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE + 1 Map 4 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col3 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE - Map 12 + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Map 11 Map Operator Tree: TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE - Map 13 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 12 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -2130,43 +2133,41 @@ STAGE PLANS: alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE Map 15 Map Operator Tree: TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE Map 16 Map Operator Tree: TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -2178,42 +2179,42 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 17 Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col3 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col3 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col3 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col3 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) Map 17 Map Operator Tree: TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -2241,20 +2242,20 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 16 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Select Operator - expressions: _col0 (type: string), _col3 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2275,20 +2276,20 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 16 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Select Operator - expressions: _col0 (type: string), _col3 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2309,20 +2310,20 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 16 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Select Operator - expressions: _col0 (type: string), _col3 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2343,53 +2344,25 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 input vertices: 0 Map 16 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Select Operator - expressions: _col0 (type: string), _col3 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 5 - Map Operator Tree: - TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 6 - Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 4 Map Operator Tree: TableScan alias: x @@ -2402,115 +2375,127 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 525 Data size: 5503 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 525 Data size: 5503 Basic stats: COMPLETE Column stats: NONE Map 8 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 10 Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - Statistics: Num rows: 1239 Data size: 13085 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col4 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1239 Data size: 13085 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 + Reducer 2 Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - Statistics: Num rows: 634 Data size: 6658 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col4 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 634 Data size: 6658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - Union 4 - Vertex: Union 4 - Union 9 - Vertex: Union 9 + Reducer 9 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Union 13 + Vertex: Union 13 + Union 3 + Vertex: Union 3 + Union 6 + Vertex: Union 6 Stage: Stage-0 Fetch Operator @@ -2547,64 +2532,65 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Union 2 (CONTAINS) -Map 12 <- Union 13 (CONTAINS) -Map 19 <- Union 13 (CONTAINS) -Map 20 <- Union 15 (CONTAINS) +Map 1 <- Map 7 (BROADCAST_EDGE) +Map 11 <- Union 9 (CONTAINS) +Map 16 <- Union 17 (CONTAINS) +Map 21 <- Union 17 (CONTAINS) +Map 22 <- Union 19 (CONTAINS) Map 23 <- Map 24 (BROADCAST_EDGE) Map 25 <- Union 26 (CONTAINS) Map 32 <- Union 26 (CONTAINS) Map 33 <- Union 28 (CONTAINS) Map 34 <- Union 30 (CONTAINS) -Map 9 <- Union 2 (CONTAINS) -Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS) -Reducer 16 <- Union 15 (SIMPLE_EDGE) -Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 8 <- Union 9 (CONTAINS) +Reducer 10 <- Union 9 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 18 <- Union 17 (SIMPLE_EDGE), Union 19 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 20 <- Union 19 (SIMPLE_EDGE) Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) Reducer 29 <- Union 28 (SIMPLE_EDGE), Union 30 (CONTAINS) -Reducer 3 <- Map 10 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 31 <- Map 23 (BROADCAST_EDGE), Union 30 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 31 <- Map 23 (BROADCAST_EDGE), Union 30 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 + Reducer 6 File Output Operator [FS_122] - Group By Operator [GBY_120] (rows=530 width=10) + Group By Operator [GBY_120] (rows=544 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 7 [SIMPLE_EDGE] + <-Union 5 [SIMPLE_EDGE] <-Reducer 31 [CONTAINS] Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1061 width=10) + Group By Operator [GBY_118] (rows=1089 width=10) Output:["_col0","_col1"],keys:_col0, _col1 Select Operator [SEL_114] (rows=484 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_167] (rows=484 width=10) - Conds:RS_111._col1=SEL_107._col1(Inner),HybridGraceHashJoin:true,Output:["_col0","_col3"] + Map Join Operator [MAPJOIN_164] (rows=484 width=10) + Conds:RS_111._col3=SEL_107._col1(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2"] <-Map 23 [BROADCAST_EDGE] BROADCAST [RS_111] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_166] (rows=27 width=7) - Conds:SEL_71._col0=RS_109._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_161] (rows=27 width=7) + Conds:SEL_71._col0=RS_109._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] <-Map 24 [BROADCAST_EDGE] BROADCAST [RS_109] PartitionCols:_col0 Select Operator [SEL_74] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=25 width=7) - predicate:key is not null + Filter Operator [FIL_154] (rows=25 width=7) + predicate:(key is not null and value is not null) TableScan [TS_72] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Select Operator [SEL_71] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=25 width=7) - predicate:(key is not null and value is not null) + Filter Operator [FIL_153] (rows=25 width=7) + predicate:key is not null TableScan [TS_69] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Select Operator [SEL_107] (rows=440 width=10) @@ -2619,7 +2605,7 @@ Stage-0 Output:["_col0","_col1"],keys:_col0, _col1 Select Operator [SEL_100] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_161] (rows=500 width=10) + Filter Operator [FIL_158] (rows=500 width=10) predicate:value is not null TableScan [TS_98] (rows=500 width=10) Output:["key","value"] @@ -2638,7 +2624,7 @@ Stage-0 Output:["_col0","_col1"],keys:_col0, _col1 Select Operator [SEL_90] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=500 width=10) + Filter Operator [FIL_157] (rows=500 width=10) predicate:value is not null TableScan [TS_88] (rows=500 width=10) Output:["key","value"] @@ -2657,7 +2643,7 @@ Stage-0 Output:["_col0","_col1"],keys:_col0, _col1 Select Operator [SEL_77] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=25 width=7) + Filter Operator [FIL_155] (rows=25 width=7) predicate:value is not null TableScan [TS_75] (rows=25 width=7) Output:["key","value"] @@ -2668,158 +2654,158 @@ Stage-0 Output:["_col0","_col1"],keys:_col0, _col1 Select Operator [SEL_80] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=500 width=10) + Filter Operator [FIL_156] (rows=500 width=10) predicate:value is not null TableScan [TS_78] (rows=500 width=10) Output:["key","value"] - <-Reducer 6 [CONTAINS] + <-Reducer 4 [CONTAINS] Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1061 width=10) + Group By Operator [GBY_118] (rows=1089 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_67] (rows=577 width=10) + Group By Operator [GBY_67] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] + <-Union 3 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1155 width=10) + Group By Operator [GBY_65] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 Select Operator [SEL_61] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_165] (rows=605 width=10) - Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] - <-Map 22 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=500 width=10) - predicate:key is not null - TableScan [TS_52] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 17 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_163] (rows=605 width=10) + Conds:RS_58._col3=RS_59._col1(Inner),Output:["_col1","_col2"] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_58] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_164] (rows=550 width=10) - Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] - <-Map 21 [SIMPLE_EDGE] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_160] (rows=550 width=10) + Conds:RS_55._col0=RS_56._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_148] (rows=500 width=10) + predicate:key is not null + TableScan [TS_26] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 15 [SIMPLE_EDGE] SHUFFLE [RS_56] - PartitionCols:_col1 - Select Operator [SEL_51] (rows=500 width=10) + PartitionCols:_col0 + Select Operator [SEL_31] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=500 width=10) + Filter Operator [FIL_149] (rows=500 width=10) predicate:(key is not null and value is not null) - TableScan [TS_49] (rows=500 width=10) + TableScan [TS_29] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=381 width=10) - Output:["_col1"] - Group By Operator [GBY_47] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 15 [SIMPLE_EDGE] - <-Map 20 [CONTAINS] - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_41] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_153] (rows=500 width=10) - predicate:value is not null - TableScan [TS_39] (rows=500 width=10) - Output:["key","value"] - <-Reducer 14 [CONTAINS] - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_37] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 13 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_28] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_151] (rows=25 width=7) - predicate:value is not null - TableScan [TS_26] (rows=25 width=7) - Output:["key","value"] - <-Map 19 [CONTAINS] - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_31] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_152] (rows=500 width=10) - predicate:value is not null - TableScan [TS_29] (rows=500 width=10) - Output:["key","value"] - <-Reducer 4 [CONTAINS] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col1 + Select Operator [SEL_54] (rows=381 width=10) + Output:["_col1"] + Group By Operator [GBY_53] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 19 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] + Reduce Output Operator [RS_52] + PartitionCols:_col0, _col1 + Group By Operator [GBY_51] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_47] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_152] (rows=500 width=10) + predicate:value is not null + TableScan [TS_45] (rows=500 width=10) + Output:["key","value"] + <-Reducer 18 [CONTAINS] + Reduce Output Operator [RS_52] + PartitionCols:_col0, _col1 + Group By Operator [GBY_51] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_43] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 17 [SIMPLE_EDGE] + <-Map 16 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_34] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_150] (rows=25 width=7) + predicate:value is not null + TableScan [TS_32] (rows=25 width=7) + Output:["key","value"] + <-Map 21 [CONTAINS] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Group By Operator [GBY_41] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_37] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_151] (rows=500 width=10) + predicate:value is not null + TableScan [TS_35] (rows=500 width=10) + Output:["key","value"] + <-Reducer 2 [CONTAINS] Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1155 width=10) + Group By Operator [GBY_65] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=550 width=10) + Select Operator [SEL_25] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_163] (rows=550 width=10) - Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=500 width=10) - predicate:key is not null - TableScan [TS_16] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 3 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_162] (rows=605 width=10) + Conds:RS_22._col3=RS_23._col1(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_22] - PartitionCols:_col2 - Map Join Operator [MAPJOIN_162] (rows=288 width=10) - Conds:SEL_12._col1=RS_20._col1(Inner),HybridGraceHashJoin:true,Output:["_col2"] - <-Map 10 [BROADCAST_EDGE] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_159] (rows=550 width=10) + Conds:SEL_2._col0=RS_20._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] + <-Map 7 [BROADCAST_EDGE] BROADCAST [RS_20] - PartitionCols:_col1 - Select Operator [SEL_15] (rows=25 width=7) + PartitionCols:_col0 + Select Operator [SEL_5] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=25 width=7) + Filter Operator [FIL_145] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_13] (rows=25 width=7) + TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_12] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_11] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_147] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["key","value"] - <-Map 9 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_148] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["key","value"] + <-Select Operator [SEL_2] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_144] (rows=500 width=10) + predicate:key is not null + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col1 + Select Operator [SEL_18] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_17] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 9 [SIMPLE_EDGE] + <-Map 11 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_11] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_147] (rows=500 width=10) + predicate:value is not null + TableScan [TS_9] (rows=500 width=10) + Output:["key","value"] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_8] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_146] (rows=25 width=7) + predicate:value is not null + TableScan [TS_6] (rows=25 width=7) + Output:["key","value"] PREHOOK: query: CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out b/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out index 0ee1917..f3de646 100644 --- a/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out @@ -158,35 +158,35 @@ STAGE PLANS: alias: x Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 6 Map Operator Tree: TableScan alias: x Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 7 Map Operator Tree: TableScan @@ -230,10 +230,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/vector_outer_join3.q.out index 3345247..f0c0113 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -475,37 +475,37 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:c + $hdt$_0:c Fetch Operator limit: -1 $hdt$_2:c Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:c + $hdt$_0:c TableScan alias: c Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) + expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col3 (type: string), _col1 (type: bigint) - 1 _col1 (type: string), _col0 (type: bigint) + 0 _col2 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) $hdt$_2:c TableScan alias: c Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), cstring1 (type: string) + expressions: cbigint (type: bigint), cstring2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col2 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col3 (type: string), _col1 (type: bigint) + 1 _col1 (type: string), _col0 (type: bigint) Stage: Stage-3 Map Reduce