diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 0200506..ed5c018 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -784,6 +784,7 @@ public Boolean visitFieldAccess(RexFieldAccess fieldAccess) { public static Set getInputRefs(RexNode expr) { InputRefsCollector irefColl = new InputRefsCollector(true); + expr.accept(irefColl); return irefColl.getInputRefSet(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 6c0bd25..42e7737 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -248,7 +248,7 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode) { // We have valid pruning expressions, only retrieve qualifying partitions ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), - HiveCalciteUtil.getInputRefs(pruneNode), this.getRelOptSchema().getTypeFactory())); + new HashSet(), this.getRelOptSchema().getTypeFactory())); partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), partitionCache); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index 3e2311c..5185535 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; +import java.util.HashSet; import java.util.List; import java.util.Map.Entry; import java.util.Set; @@ -42,6 +43,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import com.google.common.collect.ImmutableList; @@ -88,35 +90,67 @@ public void onMatch(RelOptRuleCall call) { final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); - final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + // 1. Recompose filter possibly by pulling out common elements from DNF expressions + RexNode topFilterCondition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + + // 2. We extract possible candidates to be pushed down + List OperandsToPushDown = new ArrayList<>(); + List deterministicExprs = new ArrayList<>(); + List nonDeterministicExprs = new ArrayList<>(); + + switch (topFilterCondition.getKind()) { + case AND: + ImmutableList operands = RexUtil.flattenAnd(((RexCall) topFilterCondition).getOperands()); + Set nodeDigest = new HashSet(); + List extractedCommonOperands = null; + + for (RexNode operand : operands) { + if (operand.getKind() == SqlKind.OR) { + extractedCommonOperands = extractCommonOperands(rexBuilder, operand); + for (RexNode extractedExpr : extractedCommonOperands) { + if (!nodeDigest.contains(extractedExpr.toString())) { + nodeDigest.add(extractedExpr.toString()); + OperandsToPushDown.add(extractedExpr); + } + } + } - // 1. We extract possible candidates to be pushed down - List commonOperands = new ArrayList<>(); - switch (condition.getKind()) { - case AND: - ImmutableList operands = RexUtil.flattenAnd(((RexCall) condition).getOperands()); - for (RexNode operand: operands) { - if (operand.getKind() == SqlKind.OR) { - commonOperands.addAll(extractCommonOperands(rexBuilder,operand)); + if (HiveCalciteUtil.isDeterministic(operand)) { + deterministicExprs.add(operand); + } else { + nonDeterministicExprs.add(operand); + } + } + + if (nonDeterministicExprs.size() > 0) { + for (RexNode expr : deterministicExprs) { + if (!nodeDigest.contains(expr.toString())) { + OperandsToPushDown.add(expr); } } - break; - case OR: - commonOperands = extractCommonOperands(rexBuilder,condition); - break; - default: - return; + + topFilterCondition = RexUtil.pullFactors(rexBuilder, + RexUtil.composeConjunction(rexBuilder, nonDeterministicExprs, false)); + } + break; + + case OR: + OperandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition); + break; + + default: + return; } // 2. If we did not generate anything for the new predicate, we bail out - if (commonOperands.isEmpty()) { + if (OperandsToPushDown.isEmpty()) { return; } // 3. If the new conjuncts are already present in the plan, we bail out final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter); final List newConjuncts = new ArrayList<>(); - for (RexNode commonOperand : commonOperands) { + for (RexNode commonOperand : OperandsToPushDown) { boolean found = false; for (RexNode conjunct : predicates.pulledUpPredicates) { if (commonOperand.toString().equals(conjunct.toString())) { @@ -133,12 +167,12 @@ public void onMatch(RelOptRuleCall call) { } // 4. Otherwise, we create a new condition - final RexNode newCondition = RexUtil.pullFactors(rexBuilder, + final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder, RexUtil.composeConjunction(rexBuilder, newConjuncts, false)); // 5. We create the new filter that might be pushed down - RelNode newFilter = filterFactory.createFilter(filterChild, newCondition); - RelNode newTopFilter = filterFactory.createFilter(newFilter, condition); + RelNode newChildFilter = filterFactory.createFilter(filterChild, newChildFilterCondition); + RelNode newTopFilter = filterFactory.createFilter(newChildFilter, topFilterCondition); call.transformTo(newTopFilter); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index b52779c..16292a1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -17,11 +17,16 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; @@ -31,8 +36,10 @@ import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.plan.ColStatistics; public class FilterSelectivityEstimator extends RexVisitorImpl { private final RelNode childRel; @@ -81,6 +88,21 @@ public Double visitCall(RexCall call) { break; } + case IS_NOT_NULL: { + if (childRel instanceof HiveTableScan) { + double noOfNulls = getMaxNulls(call, (HiveTableScan) childRel); + double totalNoOfTuples = childRel.getRows(); + if (totalNoOfTuples >= noOfNulls) { + selectivity = (totalNoOfTuples - noOfNulls) / totalNoOfTuples; + } else { + throw new RuntimeException("Invalid Stats number of null > no of tuples"); + } + } else { + selectivity = computeNotEqualitySelectivity(call); + } + break; + } + case LESS_THAN_OR_EQUAL: case GREATER_THAN_OR_EQUAL: case LESS_THAN: @@ -199,6 +221,23 @@ private Double computeConjunctionSelectivity(RexCall call) { return selectivity; } + private long getMaxNulls(RexCall call, HiveTableScan t) { + long tmpNoNulls = 0; + long maxNoNulls = 0; + + Set iRefSet = HiveCalciteUtil.getInputRefs(call); + List colStats = t.getColStat(new ArrayList(iRefSet)); + + for (ColStatistics cs : colStats) { + tmpNoNulls = cs.getNumNulls(); + if (tmpNoNulls > maxNoNulls) { + maxNoNulls = tmpNoNulls; + } + } + + return maxNoNulls; + } + private Double getMaxNDV(RexCall call) { double tmpNDV; double maxNDV = 1.0; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index fd78824..6737a55 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -205,6 +205,13 @@ public static ASTNode buildAST(SqlOperator op, List children) { default: node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text); } + + if (hToken.type == HiveParser.TOK_ISNOTNULL) { + ASTNode funcNode = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, + "TOK_FUNCTION"); + funcNode.addChild(node); + node = funcNode; + } } else { node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); if (op.kind != SqlKind.CAST) { @@ -310,6 +317,7 @@ private static String getName(GenericUDF hiveUDF) { registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in")); registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between")); registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); + registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, hToken(HiveParser.TOK_ISNOTNULL, "TOK_ISNOTNULL")); } private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 6b3e715..f8b3665 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -47,13 +47,18 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; @@ -337,8 +342,21 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred, } } else if (udf instanceof GenericUDFOPNot) { newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop); - } else { + } else if (udf instanceof GenericUDFOPNotNull) { + long noOfNulls = getMaxNulls(stats, genFunc); + long parentCardinality = stats.getNumRows(); + long newPredCardinality = parentCardinality; + if (parentCardinality > noOfNulls) { + newPredCardinality = parentCardinality * ((parentCardinality - noOfNulls) + / parentCardinality); + } else if (parentCardinality == noOfNulls) { + newPredCardinality = 0; + } else { + LOG.error("Invalid column stats: No of nulls > cardinality"); + } + return newPredCardinality; + } else { // single predicate condition newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, fop, evaluatedRowCount); } @@ -443,6 +461,43 @@ private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred) { return numRows / 2; } + private long getMaxNulls(Statistics stats, ExprNodeDesc pred) { + long tmpNoNulls = 0; + long maxNoNulls = 0; + + if (pred instanceof ExprNodeColumnDesc) { + ColStatistics cs = stats.getColumnStatisticsFromColName(((ExprNodeColumnDesc) pred).getColumn()); + if (cs != null) { + tmpNoNulls = cs.getNumNulls(); + } + } else if (pred instanceof ExprNodeGenericFuncDesc || pred instanceof ExprNodeColumnListDesc) { + long noNullsOfChild = 0; + for (ExprNodeDesc childExpr : pred.getChildren()) { + noNullsOfChild = getMaxNulls(stats, childExpr); + if (noNullsOfChild > tmpNoNulls) { + tmpNoNulls = noNullsOfChild; + } + } + } else if (pred instanceof ExprNodeConstantDesc) { + if (ExprNodeDescUtils.isNullConstant(pred)) { + tmpNoNulls = stats.getNumRows(); + } else { + tmpNoNulls = 0; + } + }else if (pred instanceof ExprNodeDynamicListDesc) { + tmpNoNulls = 0; + }else if (pred instanceof ExprNodeFieldDesc){ + //TODO Confirm this is safe + tmpNoNulls = getMaxNulls(stats, ((ExprNodeFieldDesc)pred).getDesc()); + } + + if (tmpNoNulls > maxNoNulls) { + maxNoNulls = tmpNoNulls; + } + + return maxNoNulls; + } + private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List neededCols, FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 0a7ce3a..0204bc5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -59,11 +59,13 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; +import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; @@ -962,57 +964,77 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // TODO: Decorelation of subquery should be done before attempting // Partition Pruning; otherwise Expression evaluation may try to execute // corelated sub query. + // 1. Projection Pruning + HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, + HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, + HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, + HiveSortLimit.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, + HiveUnion.UNION_REL_FACTORY); + basePlan = fieldTrimmer.trim(basePlan); - //0. Distinct aggregate rewrite - // Run this optimization early, since it is expanding the operator pipeline. - if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") && - conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) { - // Its not clear, if this rewrite is always performant on MR, since extra map phase - // introduced for 2nd MR job may offset gains of this multi-stage aggregation. + // 2. Distinct aggregate rewrite + // Run this optimization early, since it is expanding the operator + // pipeline. + String execEngine = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + if (!execEngine.equals("mr") && !execEngine.equals("spark") + && conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) { + // Its not clear, if this rewrite is always performant on MR, + // since extra map phase + // introduced for 2nd MR job may offset gains of this + // multi-stage aggregation. // We need a cost model for MR to enable this on MR. basePlan = hepPlan(basePlan, true, mdProvider, HiveExpandDistinctAggregatesRule.INSTANCE); } - // 1. Push Down Semi Joins - basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, - SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); + // 3. Try factoring out common filter elements & separating deterministic + // vs non-deterministic UDF + basePlan = hepPlan(basePlan, false, mdProvider, HepMatchOrder.ARBITRARY, HivePreFilteringRule.INSTANCE); - // 2. Add not null filters - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { - basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); - } - - // 3. Constant propagation, common filter extraction, and PPD + // 4. PPD for old Join Syntax + // NOTE: PPD needs to run before adding not null filters + // in order to support old style join syntax. basePlan = hepPlan(basePlan, true, mdProvider, ReduceExpressionsRule.PROJECT_INSTANCE, ReduceExpressionsRule.FILTER_INSTANCE, ReduceExpressionsRule.JOIN_INSTANCE, - HivePreFilteringRule.INSTANCE, + new HiveFilterProjectTransposeRule( + Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, + HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( + HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, new FilterAggregateTransposeRule(Filter.class, + HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class), + new FilterMergeRule(HiveFilter.DEFAULT_FILTER_FACTORY)); + + // 5. Do transitive predicate analysis and add not null filters below + // joins for NON-NULL Safe Joins. + // TODO: Add guard against null-safe join (currently CBO doesn't support + // NULL safe joins) + basePlan = hepPlan(basePlan, false, mdProvider, + new HiveJoinPushTransitivePredicatesRule( + Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), + HiveJoinAddNotNullRule.INSTANCE); + + // 6. PPD to push new filters introduced by transitive analysis & not null predicates + basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, - HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY), + HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule(HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, HiveFilterJoinRule.FILTER_ON_JOIN, - new FilterAggregateTransposeRule(Filter.class, - HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); + new FilterAggregateTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, + Aggregate.class), new FilterMergeRule(HiveFilter.DEFAULT_FILTER_FACTORY)); - // 4. Transitive inference & Partition Pruning - basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule( - Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), - new HivePartitionPruneRule(conf)); + // 7. Try pushing Down Semi Joins now that join keys might be enriched + // after transitive inference and PPD. + basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, + SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - // 5. Projection Pruning - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, - HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, - HiveSemiJoin.HIVE_SEMIJOIN_FACTORY, HiveSortLimit.HIVE_SORT_REL_FACTORY, - HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); - basePlan = fieldTrimmer.trim(basePlan); + // 8. Merge Filter-Filter, Project-Project if possible + basePlan = hepPlan(basePlan, false, mdProvider, new ProjectMergeRule(true, + HiveProject.DEFAULT_PROJECT_FACTORY)); - // 6. Rerun PPD through Project as column pruning would have introduced DT - // above scans - basePlan = hepPlan(basePlan, true, mdProvider, - new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, - HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY)); + // 9. Apply Partition Pruning + basePlan = hepPlan(basePlan, false, mdProvider, new HivePartitionPruneRule(conf)); return basePlan; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index e291a48..0223038 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -487,6 +487,14 @@ public static boolean isAllConstants(List value) { return true; } + public static boolean isNullConstant(ExprNodeDesc value) { + if ((value instanceof ExprNodeConstantDesc) + && ((ExprNodeConstantDesc) value).getValue() == null) { + return true; + } + return false; + } + public static PrimitiveTypeInfo deriveMinArgumentCast( ExprNodeDesc childExpr, TypeInfo targetType) { assert targetType instanceof PrimitiveTypeInfo : "Not a primitive type" + targetType; diff --git a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out index fc4f294..a00a196 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out @@ -120,7 +120,7 @@ STAGE PLANS: alias: over1k Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or ((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or ((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or ((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and (si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 52) and (si = 53))) (type: boolean) + predicate: ((((t = 1) or (t = 2) or (t = 3) or (t = 4) or (t = 5) or (t = 6) or (t = 7) or (t = 9) or (t = 10) or (t = 11) or (t = 12) or (t = 13) or (t = 14) or (t = 15) or (t = 16) or (t = 17) or (t = 27) or (t = 37) or (t = 47) or (t = 52)) and ((si = 2) or (si = 3) or (si = 4) or (si = 5) or (si = 6) or (si = 7) or (si = 8) or (si = 10) or (si = 11) or (si = 12) or (si = 13) or (si = 14) or (si = 15) or (si = 16) or (si = 17) or (si = 18) or (si = 28) or (si = 38) or (si = 48) or (si = 53))) and (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or ((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or ((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or ((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and (si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 52) and (si = 53)))) (type: boolean) Statistics: Num rows: 280 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 280 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE @@ -209,7 +209,7 @@ STAGE PLANS: alias: over1k Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or ((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or ((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or ((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and (si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 52) and (si = 53))) (type: boolean) + predicate: ((((t = 1) or (t = 2) or (t = 3) or (t = 4) or (t = 5) or (t = 6) or (t = 7) or (t = 9) or (t = 10) or (t = 11) or (t = 12) or (t = 13) or (t = 14) or (t = 15) or (t = 16) or (t = 17) or (t = 27) or (t = 37) or (t = 47) or (t = 52)) and ((si = 2) or (si = 3) or (si = 4) or (si = 5) or (si = 6) or (si = 7) or (si = 8) or (si = 10) or (si = 11) or (si = 12) or (si = 13) or (si = 14) or (si = 15) or (si = 16) or (si = 17) or (si = 18) or (si = 28) or (si = 38) or (si = 48) or (si = 53))) and (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or ((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or ((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or ((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and (si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 52) and (si = 53)))) (type: boolean) Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/auto_join11.q.out b/ql/src/test/results/clientpositive/auto_join11.q.out index 851920b..d98c975 100644 --- a/ql/src/test/results/clientpositive/auto_join11.q.out +++ b/ql/src/test/results/clientpositive/auto_join11.q.out @@ -23,11 +23,11 @@ STAGE PLANS: Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:$hdt$_0:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:$hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/auto_join13.q.out b/ql/src/test/results/clientpositive/auto_join13.q.out index 952dbf8..a1ec9af 100644 --- a/ql/src/test/results/clientpositive/auto_join13.q.out +++ b/ql/src/test/results/clientpositive/auto_join13.q.out @@ -32,7 +32,7 @@ STAGE PLANS: $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -51,7 +51,7 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) 1 UDFToDouble(_col0) (type: double) - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index 3d0067b..3c4b011 100644 --- a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -675,11 +675,11 @@ STAGE PLANS: Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:a + $hdt$_2:$hdt$_2:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:a + $hdt$_2:$hdt$_2:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index d52586f..29058f0 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -630,10 +630,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq2:subq1:a + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + predicate: ((key < 8) and (key < 6)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) @@ -1176,7 +1176,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq1:a + alias: subq2:a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) @@ -1188,7 +1188,7 @@ STAGE PLANS: Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 + Inner Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) @@ -1286,10 +1286,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: a:subq2:subq1:a + alias: a:b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + predicate: ((key < 8) and (key < 6)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) diff --git a/ql/src/test/results/clientpositive/explain_logical.q.out b/ql/src/test/results/clientpositive/explain_logical.q.out index 8fa0a4c..fc54a0e 100644 --- a/ql/src/test/results/clientpositive/explain_logical.q.out +++ b/ql/src/test/results/clientpositive/explain_logical.q.out @@ -512,23 +512,23 @@ $hdt$_0:$hdt$_0:srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) expressions: key (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_8) - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Join Operator (JOIN_11) condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 + outputColumnNames: _col1, _col3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_12) - expressions: _col0 (type: string), _col2 (type: string) + expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_13) @@ -559,9 +559,9 @@ $hdt$_1:src2 condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 + outputColumnNames: _col1, _col3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V4 diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index af42d5c..5b970be 100644 --- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -9,12 +9,12 @@ FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AN WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -72,17 +72,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -97,17 +90,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col4 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col4 (type: string) + expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -135,12 +135,12 @@ FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AN WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -198,17 +198,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -223,17 +216,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col4 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col4 (type: string) + expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out index 9c51ff3..5314722 100644 --- a/ql/src/test/results/clientpositive/flatten_and_or.q.out +++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out @@ -44,7 +44,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) + predicate: ((((key = '0') or (key = '1') or (key = '2') or (key = '3') or (key = '4') or (key = '5') or (key = '6') or (key = '7') or (key = '8') or (key = '9') or (key = '10')) and ((value = '8') or (value = '5') or (value = '6') or (value = '1') or (value = '3'))) and (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3')))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/fouter_join_ppr.q.out b/ql/src/test/results/clientpositive/fouter_join_ppr.q.out index 087edf2..11e7b3d 100644 --- a/ql/src/test/results/clientpositive/fouter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/fouter_join_ppr.q.out @@ -1232,7 +1232,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:a] + /src [$hdt$_0:$hdt$_0:a] /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] Needs Tagging: true @@ -1605,7 +1605,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:b] + /src [$hdt$_1:$hdt$_1:b] /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:a] /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:a] Needs Tagging: true diff --git a/ql/src/test/results/clientpositive/input42.q.out b/ql/src/test/results/clientpositive/input42.q.out index 2974159..eda4a52 100644 --- a/ql/src/test/results/clientpositive/input42.q.out +++ b/ql/src/test/results/clientpositive/input42.q.out @@ -1819,32 +1819,128 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart Processor Tree: TableScan alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (rand(100) < 0.1) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2 = '2008-04-08') and (rand(100) < 0.1)) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - ListSink + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and rand(100) < 0.1 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select * from srcpart a where a.ds='2008-04-08' and rand(100) < 0.1 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 113 val_113 2008-04-08 11 118 val_118 2008-04-08 11 diff --git a/ql/src/test/results/clientpositive/join34.q.out b/ql/src/test/results/clientpositive/join34.q.out index ffdf5a5..bf754d3 100644 --- a/ql/src/test/results/clientpositive/join34.q.out +++ b/ql/src/test/results/clientpositive/join34.q.out @@ -148,11 +148,11 @@ STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:x + $hdt$_1:$hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:x + $hdt$_1:$hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/join35.q.out b/ql/src/test/results/clientpositive/join35.q.out index 5b68295..b947df1 100644 --- a/ql/src/test/results/clientpositive/join35.q.out +++ b/ql/src/test/results/clientpositive/join35.q.out @@ -262,11 +262,11 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:x + $hdt$_1:$hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:x + $hdt$_1:$hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 7f32108..bebb49e 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -413,14 +413,98 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((((key = '484') or (key = '238')) and ((value = 'val_484') or (value = 'val_238'))) and (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238')))) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: value=val_238 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 4 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.fact_daily + numFiles 3 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct fact_daily { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.fact_daily + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct fact_daily { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.fact_daily + name: default.fact_daily +#### A masked pattern was here #### Partition + base file name: value=val_484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -464,20 +548,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily + Truncated Path -> Alias: + /fact_daily/ds=1/hr=4/key=238/value=val_238 [$hdt$_0:fact_daily] + /fact_daily/ds=1/hr=4/key=484/value=val_484 [$hdt$_0:fact_daily] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238'))) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: -- List Bucketing Query SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and ( (key='484' and value ='val_484') or (key='238' and value= 'val_238')) diff --git a/ql/src/test/results/clientpositive/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/louter_join_ppr.q.out index 1b2b8e3..2904aca 100644 --- a/ql/src/test/results/clientpositive/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/louter_join_ppr.q.out @@ -757,7 +757,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:b] + /src [$hdt$_1:$hdt$_1:b] /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:a] /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:a] /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:a] @@ -1518,7 +1518,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:b] + /src [$hdt$_1:$hdt$_1:b] /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:a] /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:a] Needs Tagging: true diff --git a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index e92f307..d7a146f 100644 --- a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -466,14 +466,14 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:src + $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 $hdt$_2:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:src + $hdt$_0:$hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/nonmr_fetch.q.out b/ql/src/test/results/clientpositive/nonmr_fetch.q.out index 9652d7e..0a259e0 100644 --- a/ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ b/ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -933,25 +933,33 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (rand() > 1.0) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) - outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 = '2008-04-09') and (rand() > 1.0)) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - ListSink + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out b/ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out index bfe983a..ff1e343 100644 --- a/ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out +++ b/ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out @@ -762,7 +762,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:a] + /src [$hdt$_0:$hdt$_0:a] /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] Needs Tagging: true diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index d7c40a3..9fdbecf 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -976,8 +976,8 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [pcr_t1] - /pcr_t1/ds=2000-04-10 [pcr_t1] + /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-10 [$hdt$_0:pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -1258,9 +1258,9 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [pcr_t1] - /pcr_t1/ds=2000-04-09 [pcr_t1] - /pcr_t1/ds=2000-04-10 [pcr_t1] + /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-09 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-10 [$hdt$_0:pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -2475,7 +2475,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + predicate: (((key = 1) or (key = 2)) and (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2)))) (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) @@ -2581,8 +2581,8 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [pcr_t1] - /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-09 [$hdt$_0:pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out index 7e19be4..acf1416 100644 --- a/ql/src/test/results/clientpositive/pointlookup.q.out +++ b/ql/src/test/results/clientpositive/pointlookup.q.out @@ -44,7 +44,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) + predicate: ((((key = '0') or (key = '1') or (key = '2') or (key = '3') or (key = '4') or (key = '5') or (key = '6') or (key = '7') or (key = '8') or (key = '9') or (key = '10')) and ((value = '8') or (value = '5') or (value = '6') or (value = '1') or (value = '3'))) and (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3')))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -110,7 +110,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) + predicate: ((((key = '0') or (key = '1') or (key = '2') or (key = '3') or (key = '4') or (key = '5') or (key = '6') or (key = '7') or (key = '8') or (key = '9') or (key = '10')) and ((value = '8') or (value = '5') or (value = '6') or (value = '1') or (value = '3'))) and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -176,7 +176,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean) + predicate: (((((key = '0') or (key = '1') or (key = '2') or (key = '3') or (key = '4') or (key = '5') or (key = '6') or (key = '7') or (key = '8') or (key = '9') or (key = '10')) and ((value = '8') or (value = '5') or (value = '6') or (value = '1') or (value = '3'))) and (value) IN ('1', '3', '5', '6', '8')) and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index 55edd90..648cd07 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -165,7 +165,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + predicate: (((key = 1) or (key = 2)) and (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) @@ -271,8 +271,8 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [pcr_t1] - /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-09 [$hdt$_0:pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out index 4cfb97e..8ce596c 100644 --- a/ql/src/test/results/clientpositive/pointlookup3.q.out +++ b/ql/src/test/results/clientpositive/pointlookup3.q.out @@ -125,7 +125,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(ds1,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + predicate: (((key = 1) or (key = 2)) and (struct(key,ds1)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) @@ -233,8 +233,8 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [pcr_t1] - /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [pcr_t1] + /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [$hdt$_0:pcr_t1] + /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [$hdt$_0:pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -364,7 +364,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key = 1) (type: boolean) + predicate: (((key = 1) or (key = 2)) and (key = 1)) (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), ds1 (type: string) @@ -426,7 +426,7 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [pcr_t1] + /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [$hdt$_0:pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/ppd_gby_join.q.out index e3f71e7..8b7e06a 100644 --- a/ql/src/test/results/clientpositive/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_gby_join.q.out @@ -90,10 +90,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + predicate: ((_col2 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -360,10 +360,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col2 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + predicate: ((_col2 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_join.q.out b/ql/src/test/results/clientpositive/ppd_join.q.out index 58c4e43..0a2176b 100644 --- a/ql/src/test/results/clientpositive/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_join.q.out @@ -88,13 +88,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + predicate: ((_col2 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -613,13 +613,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + predicate: ((_col2 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_join2.q.out b/ql/src/test/results/clientpositive/ppd_join2.q.out index e99839e..aaa0289 100644 --- a/ql/src/test/results/clientpositive/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/ppd_join2.q.out @@ -129,15 +129,15 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator @@ -151,12 +151,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col1 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col2, _col5 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col4 (type: string) + expressions: _col2 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1816,12 +1816,12 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator @@ -1835,12 +1835,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col1 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col2, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col4 (type: string) + expressions: _col2 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_join3.q.out b/ql/src/test/results/clientpositive/ppd_join3.q.out index f2b0b60..7384e0f 100644 --- a/ql/src/test/results/clientpositive/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/ppd_join3.q.out @@ -95,14 +95,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) + predicate: ((_col2 > '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: boolean) @@ -150,7 +150,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col3 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -158,10 +158,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col3 + outputColumnNames: _col1, _col4 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1850,14 +1850,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) + predicate: ((_col2 > '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1890,7 +1890,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col3 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1898,10 +1898,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col3 + outputColumnNames: _col1, _col4 Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/ppd_join_filter.q.out index 1781fe9..5fa3749 100644 --- a/ql/src/test/results/clientpositive/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/ppd_join_filter.q.out @@ -213,7 +213,7 @@ STAGE PLANS: Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false @@ -228,7 +228,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -271,7 +271,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 21 Data size: 222 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: double), _col2 (type: double) + value expressions: _col2 (type: double), _col3 (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -282,7 +282,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -291,7 +291,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -351,10 +351,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + expressions: _col0 (type: string), _col3 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -634,7 +634,7 @@ STAGE PLANS: Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -645,7 +645,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -684,7 +684,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: double), _col2 (type: double) + value expressions: _col2 (type: double), _col3 (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -695,7 +695,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -704,7 +704,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -764,10 +764,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + expressions: _col0 (type: string), _col3 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1047,7 +1047,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false @@ -1062,7 +1062,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -1101,7 +1101,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 42 Data size: 445 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: double), _col2 (type: double) + value expressions: _col2 (type: double), _col3 (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1112,7 +1112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -1121,7 +1121,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -1181,10 +1181,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + expressions: _col0 (type: string), _col3 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1464,7 +1464,7 @@ STAGE PLANS: Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1475,7 +1475,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -1514,7 +1514,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: double), _col2 (type: double) + value expressions: _col2 (type: double), _col3 (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1525,7 +1525,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -1534,7 +1534,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2 + columns _col0,_col2,_col3 columns.types string,double,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -1594,10 +1594,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + expressions: _col0 (type: string), _col3 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_outer_join1.q.out b/ql/src/test/results/clientpositive/ppd_outer_join1.q.out index d021d5d..00a5871 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join1.q.out @@ -54,12 +54,15 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Filter Operator + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/ppd_random.q.out b/ql/src/test/results/clientpositive/ppd_random.q.out index ff9a812..99bd994 100644 --- a/ql/src/test/results/clientpositive/ppd_random.q.out +++ b/ql/src/test/results/clientpositive/ppd_random.q.out @@ -35,13 +35,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 > '2') (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner1.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner1.q.out index 90f17a7..4d30822 100644 --- a/ql/src/test/results/clientpositive/rand_partitionpruner1.q.out +++ b/ql/src/test/results/clientpositive/rand_partitionpruner1.q.out @@ -38,13 +38,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (rand(1) < 0.1) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (rand(1) < 0.1) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -115,7 +115,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] + /src [$hdt$_0:src] Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index d04ad8b..b89e076 100644 --- a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -67,42 +67,46 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (rand(1) < 0.1) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((rand(1) < 0.1) and (_col2 = '2008-04-08')) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,hr,ds - columns.comments - columns.types string:string:string:string -#### A masked pattern was here #### - name default.tmptable - serialization.ddl struct tmptable { string key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,hr,ds + columns.comments + columns.types string:string:string:string +#### A masked pattern was here #### + name default.tmptable + serialization.ddl struct tmptable { string key, string value, string hr, string ds} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -198,9 +202,103 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a] - /srcpart/ds=2008-04-08/hr=12 [a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:a] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:a] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:a] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:a] Stage: Stage-7 Conditional Operator @@ -387,6 +485,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@tmptable POSTHOOK: query: insert overwrite table tmptable select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' @@ -394,6 +494,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.ds SIMPLE [(srcpart)a.FieldSchema(name:hr, type:string, comment:null), ] POSTHOOK: Lineage: tmptable.hr SIMPLE [(srcpart)a.FieldSchema(name:ds, type:string, comment:null), ] diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out index 634e171..3ba6fd2 100644 --- a/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out +++ b/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out @@ -69,6 +69,138 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 hr 12 properties: COLUMN_STATS_ACCURATE true @@ -111,33 +243,41 @@ STAGE PLANS: Processor Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((rand(1) < 0.1) and ((UDFToDouble(key) <= 50.0) and (UDFToDouble(key) >= 10.0))) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((rand(1) < 0.1) and (_col2 = '2008-04-08') and ((UDFToDouble(_col0) <= 50.0) and (UDFToDouble(_col0) >= 10.0)) and (_col3 like '%2')) (type: boolean) Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - ListSink + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -42 val_42 2008-04-08 12 -44 val_44 2008-04-08 12 -26 val_26 2008-04-08 12 +47 val_47 2008-04-08 12 +35 val_35 2008-04-08 12 18 val_18 2008-04-08 12 -37 val_37 2008-04-08 12 PREHOOK: query: -- without rand for comparison explain extended select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/router_join_ppr.q.out b/ql/src/test/results/clientpositive/router_join_ppr.q.out index b7d593f..3c6b5ba 100644 --- a/ql/src/test/results/clientpositive/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/router_join_ppr.q.out @@ -384,7 +384,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:a] + /src [$hdt$_0:$hdt$_0:a] /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b] @@ -1145,7 +1145,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:a] + /src [$hdt$_0:$hdt$_0:a] /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] Needs Tagging: true diff --git a/ql/src/test/results/clientpositive/spark/groupby_position.q.out b/ql/src/test/results/clientpositive/spark/groupby_position.q.out index 99223b9..bbcbc26 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -552,7 +552,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (GROUP, 2) Reducer 6 <- Map 5 (GROUP, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (SORT, 1) @@ -567,17 +567,17 @@ STAGE PLANS: predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Map 5 @@ -601,25 +601,16 @@ STAGE PLANS: Reducer 2 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index fb08f10..44ed533 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -2534,7 +2534,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + predicate: (((key = 1) or (key = 2)) and (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2)))) (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out index 306292a..925c257 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -98,10 +98,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + predicate: ((_col2 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -366,10 +366,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col2 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + predicate: ((_col2 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ppd_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_join.q.out index aee7630..6715be1 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join.q.out @@ -96,13 +96,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + predicate: ((_col2 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -629,13 +629,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + predicate: ((_col2 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out index b4c7f23..0b5c447 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -55,15 +55,15 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: @@ -125,12 +125,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col1 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col2, _col5 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col4 (type: string) + expressions: _col2 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1763,12 +1763,12 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: @@ -1812,12 +1812,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col1 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col2, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col4 (type: string) + expressions: _col2 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out index c9f6762..ef962db 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -129,10 +129,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col3 + outputColumnNames: _col1, _col4 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -150,14 +150,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) + predicate: ((_col2 > '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: boolean) @@ -170,7 +170,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col3 (type: string) Stage: Stage-0 Fetch Operator @@ -1875,10 +1875,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col3 + outputColumnNames: _col1, _col4 Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1896,21 +1896,21 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) + predicate: ((_col2 > '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col3 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out index 96defa6..8c1f4e9 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out @@ -287,10 +287,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + expressions: _col0 (type: string), _col3 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -329,7 +329,7 @@ STAGE PLANS: Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false @@ -341,7 +341,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 21 Data size: 222 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: double), _col2 (type: double) + value expressions: _col2 (type: double), _col3 (type: double) auto parallelism: false Stage: Stage-0 @@ -669,10 +669,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + expressions: _col0 (type: string), _col3 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -711,7 +711,7 @@ STAGE PLANS: Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -719,7 +719,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: double), _col2 (type: double) + value expressions: _col2 (type: double), _col3 (type: double) auto parallelism: false Stage: Stage-0 @@ -1047,10 +1047,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + expressions: _col0 (type: string), _col3 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1089,7 +1089,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false @@ -1101,7 +1101,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 42 Data size: 445 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: double), _col2 (type: double) + value expressions: _col2 (type: double), _col3 (type: double) auto parallelism: false Stage: Stage-0 @@ -1429,10 +1429,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + expressions: _col0 (type: string), _col3 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1471,7 +1471,7 @@ STAGE PLANS: Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -1479,7 +1479,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: double), _col2 (type: double) + value expressions: _col2 (type: double), _col3 (type: double) auto parallelism: false Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out index 987c653..4a3c7be 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out @@ -61,12 +61,15 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Filter Operator + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/subquery_notexists.q.out b/ql/src/test/results/clientpositive/subquery_notexists.q.out index 215d855..70f1677 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -275,7 +275,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -302,9 +302,9 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator @@ -312,11 +312,11 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col3 + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) + predicate: _col2 is null (type: boolean) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) diff --git a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out index 637fc62..65f0a32 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out @@ -223,9 +223,9 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator @@ -233,11 +233,11 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col3 + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) + predicate: _col2 is null (type: boolean) Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -278,7 +278,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/subquery_notin.q.out b/ql/src/test/results/clientpositive/subquery_notin.q.out index 5563794..300b282 100644 --- a/ql/src/test/results/clientpositive/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/subquery_notin.q.out @@ -1546,17 +1546,20 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '11') and (CASE WHEN ((key > '104')) THEN (null) ELSE (key) END < '11')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 < '11') (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out index 1562087..11df227 100644 --- a/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out +++ b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out @@ -94,13 +94,17 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: union_all_bug_test_2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -113,13 +117,17 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -527,8 +535,8 @@ STAGE PLANS: predicate: ((if(true, f1, f2) = 1) and (f1 = 1)) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 + expressions: 1 (type: int), if(true, 1, f2) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: COMPLETE @@ -550,8 +558,8 @@ STAGE PLANS: predicate: false (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 + expressions: 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Union Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/vectorization_7.q.out b/ql/src/test/results/clientpositive/vectorization_7.q.out index 6e2a0ea..13fde8e 100644 --- a/ql/src/test/results/clientpositive/vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/vectorization_7.q.out @@ -68,22 +68,22 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss'))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss'))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE @@ -250,22 +250,22 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss'))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss'))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE