diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml index ca5028d..9fe829d 100644 --- a/druid-handler/pom.xml +++ b/druid-handler/pom.xml @@ -212,6 +212,17 @@ org.apache.calcite calcite-druid ${calcite.version} + + + org.apache.calcite.avatica + avatica-core + + + + + org.apache.calcite.avatica + avatica + ${avatica.version} diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java index 0b35428..be374af 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java @@ -170,7 +170,11 @@ private static String createSelectStarQuery(String dataSource) throws IOExceptio // Create Select query SelectQueryBuilder builder = new Druids.SelectQueryBuilder(); builder.dataSource(dataSource); - builder.intervals(Arrays.asList(DruidTable.DEFAULT_INTERVAL)); + final List intervals = Arrays.asList(); + new Interval(DruidTable.DEFAULT_INTERVAL.getStartMillis(), + DruidTable.DEFAULT_INTERVAL.getEndMillis(), + ISOChronology.getInstanceUTC()); + builder.intervals(intervals); builder.pagingSpec(PagingSpec.newSpec(1)); Map context = new HashMap<>(); context.put(Constants.DRUID_QUERY_FETCH, false); @@ -413,11 +417,15 @@ private static String createSelectStarQuery(String dataSource) throws IOExceptio private static List> createSplitsIntervals(List intervals, int numSplits ) { - final long totalTime = DruidDateTimeUtils.extractTotalTime(intervals); + long startTime = intervals.get(0).getStartMillis(); long endTime = startTime; long currTime = 0; List> newIntervals = new ArrayList<>(); + long totalTime = 0; + for (Interval interval: intervals) { + totalTime += interval.getEndMillis() - interval.getStartMillis(); + } for (int i = 0, posIntervals = 0; i < numSplits; i++) { final long rangeSize = Math.round((double) (totalTime * (i + 1)) / numSplits) - Math.round((double) (totalTime * i) / numSplits); diff --git a/pom.xml b/pom.xml index 3ea3c77..5d1b620 100644 --- a/pom.xml +++ b/pom.xml @@ -113,10 +113,10 @@ 3.5.2 1.5.6 0.1 - 1.8.0 + 1.10.0-SNAPSHOT 1.7.7 0.8.0.RELEASE - 1.10.0 + 1.12.0-SNAPSHOT 4.2.4 4.1.17 4.1.19 diff --git a/ql/pom.xml b/ql/pom.xml index 7db0ede..c541538 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -379,12 +379,22 @@ com.fasterxml.jackson.core jackson-core + + org.apache.calcite.avatica + avatica-core + org.apache.calcite calcite-druid ${calcite.version} + + + org.apache.calcite.avatica + avatica-core + + org.apache.calcite.avatica diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 1d78b4c..b121eea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -34,6 +34,7 @@ import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.adapter.druid.DruidSchema; import org.apache.calcite.adapter.druid.DruidTable; +import org.apache.calcite.adapter.druid.LocalInterval; import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptMaterialization; @@ -310,7 +311,7 @@ private static RelNode createTableScan(Table viewTable) { } metrics.add(field.getName()); } - List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); + List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 6ccd879..e339d0a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -48,6 +48,7 @@ import org.apache.calcite.rex.RexLocalRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexPatternFieldRef; import org.apache.calcite.rex.RexRangeRef; import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; @@ -1074,6 +1075,11 @@ public Boolean visitSubQuery(RexSubQuery subQuery) { // it seems that it is not used by anything. return false; } + + @Override + public Boolean visitPatternFieldRef(RexPatternFieldRef fieldRef) { + return false; + } } public static Set getInputRefs(RexNode expr) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java index 9a65de3..d0b1757 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.plan.Context; import org.apache.calcite.rel.RelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; @@ -27,11 +28,14 @@ public class HivePlannerContext implements Context { private HiveAlgorithmsConf algoConfig; private HiveRulesRegistry registry; + private CalciteConnectionConfig calciteConfig; private Set corrScalarRexSQWithAgg; - public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, Set corrScalarRexSQWithAgg) { + public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, + CalciteConnectionConfig calciteConfig, Set corrScalarRexSQWithAgg) { this.algoConfig = algoConfig; this.registry = registry; + this.calciteConfig = calciteConfig; // this is to keep track if a subquery is correlated and contains aggregate // this is computed in CalcitePlanner while planning and is later required by subuery remove rule // hence this is passed using HivePlannerContext @@ -45,6 +49,9 @@ public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry regis if (clazz.isInstance(registry)) { return clazz.cast(registry); } + if (clazz.isInstance(calciteConfig)) { + return clazz.cast(calciteConfig); + } if(clazz.isInstance(corrScalarRexSQWithAgg)) { return clazz.cast(corrScalarRexSQWithAgg); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java index 2aadf50..8e52d88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java @@ -27,6 +27,7 @@ import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalIntersect; import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalMatch; import org.apache.calcite.rel.logical.LogicalMinus; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalSort; @@ -140,6 +141,10 @@ public RelNode visit(LogicalExchange exchange) { public RelNode visit(RelNode other) { return visitChildren(other); } + + public RelNode visit(LogicalMatch match) { + return visitChildren(match); + } } // End RelShuttleImpl.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java index 4edc4df..0b94b8a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java @@ -24,6 +24,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlTypeTransforms; import com.google.common.collect.Sets; @@ -42,9 +43,10 @@ Sets.newHashSet(YEAR, QUARTER, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND); private HiveExtractDate(String name) { - super(name, SqlKind.EXTRACT, ReturnTypes.INTEGER_NULLABLE, null, - OperandTypes.INTERVALINTERVAL_INTERVALDATETIME, - SqlFunctionCategory.SYSTEM); + super(name, SqlKind.EXTRACT, + ReturnTypes.cascade(ReturnTypes.INTEGER, SqlTypeTransforms.FORCE_NULLABLE), null, + OperandTypes.INTERVALINTERVAL_INTERVALDATETIME, + SqlFunctionCategory.SYSTEM); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java index 38d7906..81de33f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptMaterializations; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; @@ -77,7 +78,7 @@ protected void apply(RelOptRuleCall call, Project project, Filter filter, TableS // Costing is done in transformTo(), so we call it repeatedly with all applicable // materialized views and cheapest one will be picked List applicableMaterializations = - VolcanoPlanner.getApplicableMaterializations(root, materializations); + RelOptMaterializations.getApplicableMaterializations(root, materializations); for (RelOptMaterialization materialization : applicableMaterializations) { List subs = new MaterializedViewSubstitutionVisitor( materialization.queryRel, root, relBuilderFactory).go(materialization.tableRel); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index 69e157e..9bcdd0c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -165,7 +165,7 @@ public RelOptPredicateList getPredicates(Project project, RelMetadataQuery mq) { rexBuilder.makeInputRef(project, expr.i), expr.e)); } } - return RelOptPredicateList.of(projectPullUpPredicates); + return RelOptPredicateList.of(rexBuilder, projectPullUpPredicates); } /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */ @@ -202,6 +202,7 @@ public RelOptPredicateList getPredicates(Aggregate agg, RelMetadataQuery mq) { final RelNode input = agg.getInput(); final RelOptPredicateList inputInfo = mq.getPulledUpPredicates(input); final List aggPullUpPredicates = new ArrayList<>(); + final RexBuilder rexBuilder = agg.getCluster().getRexBuilder(); ImmutableBitSet groupKeys = agg.getGroupSet(); Mapping m = Mappings.create(MappingType.PARTIAL_FUNCTION, @@ -219,7 +220,7 @@ public RelOptPredicateList getPredicates(Aggregate agg, RelMetadataQuery mq) { aggPullUpPredicates.add(r); } } - return RelOptPredicateList.of(aggPullUpPredicates); + return RelOptPredicateList.of(rexBuilder, aggPullUpPredicates); } /** @@ -271,7 +272,7 @@ public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) { if (!disjPred.isAlwaysTrue()) { preds.add(disjPred); } - return RelOptPredicateList.of(preds); + return RelOptPredicateList.of(rB, preds); } /** @@ -411,6 +412,7 @@ public RelOptPredicateList inferPredicates( final JoinRelType joinType = joinRel.getJoinType(); final List leftPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(leftChildPredicates)); final List rightPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(rightChildPredicates)); + final RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); switch (joinType) { case INNER: case LEFT: @@ -476,13 +478,13 @@ public RelOptPredicateList inferPredicates( pulledUpPredicates = Iterables.concat(leftPreds, rightPreds, RelOptUtil.conjunctions(joinRel.getCondition()), inferredPredicates); } - return RelOptPredicateList.of( + return RelOptPredicateList.of(rexBuilder, pulledUpPredicates, leftInferredPredicates, rightInferredPredicates); case LEFT: - return RelOptPredicateList.of( + return RelOptPredicateList.of(rexBuilder, leftPreds, EMPTY_LIST, rightInferredPredicates); case RIGHT: - return RelOptPredicateList.of( + return RelOptPredicateList.of(rexBuilder, rightPreds, leftInferredPredicates, EMPTY_LIST); default: assert inferredPredicates.size() == 0; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index 0dc0c24..a43d2be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseDriver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; public class ASTBuilder { @@ -269,19 +271,23 @@ public static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral) type = ((Boolean) val).booleanValue() ? HiveParser.KW_TRUE : HiveParser.KW_FALSE; break; case DATE: { - val = literal.getValue(); + //Calcite Calendar is always GMT, Hive atm uses JVM local + final Calendar c = (Calendar) literal.getValue(); + final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone())); type = HiveParser.TOK_DATELITERAL; DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); - val = df.format(((Calendar) val).getTime()); + val = df.format(dt.toDateTime(DateTimeZone.getDefault()).toDate()); val = "'" + val + "'"; } break; case TIME: case TIMESTAMP: { - val = literal.getValue(); + //Calcite Calendar is always GMT, Hive atm uses JVM local + final Calendar c = (Calendar) literal.getValue(); + final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone())); type = HiveParser.TOK_TIMESTAMPLITERAL; DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); - val = df.format(((Calendar) val).getTime()); + val = df.format(dt.toDateTime(DateTimeZone.getDefault()).toDate()); val = "'" + val + "'"; } break; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 27990a2..165f8c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -24,7 +24,6 @@ import java.util.Map; import org.apache.calcite.adapter.druid.DruidQuery; -import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelVisitor; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index e840938..b1efbbd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.translator; import java.math.BigDecimal; -import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Calendar; @@ -75,6 +74,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -291,16 +292,17 @@ public ExprNodeDesc visitLiteral(RexLiteral literal) { case DOUBLE: return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, Double.valueOf(((Number) literal.getValue3()).doubleValue())); - case DATE: + case DATE: { + final Calendar c = (Calendar) literal.getValue(); return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, - new Date(((Calendar)literal.getValue()).getTimeInMillis())); + new java.sql.Date(c.getTimeInMillis())); + } case TIME: case TIMESTAMP: { - Object value = literal.getValue3(); - if (value instanceof Long) { - value = new Timestamp((Long)value); - } - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value); + final Calendar c = (Calendar) literal.getValue(); + final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone())); + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + new Timestamp(dt.getMillis())); } case BINARY: return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index a05b89c..10551a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -23,11 +23,12 @@ import java.util.ArrayList; import java.util.Calendar; import java.util.Date; -import java.util.GregorianCalendar; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; +import org.apache.calcite.avatica.util.DateTimeUtils; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.RelOptCluster; @@ -38,8 +39,8 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlKind; @@ -76,8 +77,10 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; @@ -96,6 +99,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; @@ -248,6 +253,8 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase; boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && func.getChildren().size() != 0; + boolean isBetween = !isNumeric && tgtUdf instanceof GenericUDFBetween; + boolean isIN = !isNumeric && tgtUdf instanceof GenericUDFIn; if (isNumeric) { tgtDT = func.getTypeInfo(); @@ -266,15 +273,32 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { } else if (isTransformableTimeStamp) { // unix_timestamp(args) -> to_unix_timestamp(args) func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren()); + } else if (isBetween) { + assert func.getChildren().size() == 4; + // We skip first child as is not involved (is the revert boolean) + // The target type needs to account for all 3 operands + tgtDT = FunctionRegistry.getCommonClassForComparison( + func.getChildren().get(1).getTypeInfo(), + FunctionRegistry.getCommonClassForComparison( + func.getChildren().get(2).getTypeInfo(), + func.getChildren().get(3).getTypeInfo())); + } else if (isIN) { + assert func.getChildren().size() > 1; + tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0) + .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); } - for (ExprNodeDesc childExpr : func.getChildren()) { + for (int i =0; i < func.getChildren().size(); ++i) { + ExprNodeDesc childExpr = func.getChildren().get(i); tmpExprNode = childExpr; if (tgtDT != null && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) { - if (isCompare) { + if (isCompare || isBetween || isIN) { // For compare, we will convert requisite children - tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT); + // For BETWEEN skip the first child (the revert boolean) + if (!isBetween || i > 0) { + tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT); + } } else if (isNumeric) { // For numeric, we'll do minimum necessary cast - if we cast to the type // of expression, bad things will happen. @@ -634,20 +658,33 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value)); break; case DATE: - Calendar cal = new GregorianCalendar(); - cal.setTime((Date) value); - calciteLiteral = rexBuilder.makeDateLiteral(cal); - break; - case TIMESTAMP: - Calendar c = null; - if (value instanceof Calendar) { - c = (Calendar)value; - } else { - c = Calendar.getInstance(); - c.setTimeInMillis(((Timestamp)value).getTime()); - } - calciteLiteral = rexBuilder.makeTimestampLiteral(c, RelDataType.PRECISION_NOT_SPECIFIED); - break; + // The Calcite literal is in GMT, this will be converted back to JVM locale + // by ASTBuilder.literal during Calcite->Hive plan conversion + final Calendar cal = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault()); + cal.setTime((Date) value); + calciteLiteral = rexBuilder.makeDateLiteral(cal); + break; + case TIMESTAMP: + // The Calcite literal is in GMT, this will be converted back to JVM locale + // by ASTBuilder.literal during Calcite->Hive plan conversion + final Calendar calt = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault()); + if (value instanceof Calendar) { + final Calendar c = (Calendar) value; + long timeMs = c.getTimeInMillis(); + calt.setTimeInMillis(timeMs); + } else { + final Timestamp ts = (Timestamp) value; + calt.setTimeInMillis(ts.getTime()); + } + // Must call makeLiteral, not makeTimestampLiteral + // to have the RexBuilder.roundTime logic kick in + calciteLiteral = rexBuilder.makeLiteral( + calt, + rexBuilder.getTypeFactory().createSqlType( + SqlTypeName.TIMESTAMP, + rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP)), + false); + break; case INTERVAL_YEAR_MONTH: // Calcite year-month literal value is months as BigDecimal BigDecimal totalMonths = BigDecimal.valueOf(((HiveIntervalYearMonth) value).getTotalMonths()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 2b075be..877f380 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -220,7 +220,11 @@ protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReduc ExprNodeDesc cexpr = cRSc.getKeyCols().get(i); ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length]; for (int tag = 0; tag < pRSs.length; tag++) { - pexprs[tag] = pRSs[tag].getConf().getKeyCols().get(i); + final ArrayList parentKeyCols = pRSs[tag].getConf().getKeyCols(); + if (i >= parentKeyCols.size()) { + return false; + } + pexprs[tag] = parentKeyCols.get(i); } int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted); if (found != i) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index bf0a11b..916eb1d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -38,6 +38,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -50,10 +51,12 @@ import org.apache.calcite.adapter.druid.DruidRules; import org.apache.calcite.adapter.druid.DruidSchema; import org.apache.calcite.adapter.druid.DruidTable; +import org.apache.calcite.adapter.druid.LocalInterval; +import org.apache.calcite.config.CalciteConnectionConfigImpl; +import org.apache.calcite.config.CalciteConnectionProperty; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptMaterialization; import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelOptPlanner.Executor; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptUtil; @@ -93,6 +96,7 @@ import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rel.type.RelDataTypeImpl; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexExecutor; import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; @@ -244,7 +248,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.joda.time.Interval; import com.google.common.base.Function; import com.google.common.collect.ArrayListMultimap; @@ -253,6 +256,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; +import org.apache.calcite.config.CalciteConnectionConfig; public class CalcitePlanner extends SemanticAnalyzer { @@ -1298,7 +1302,13 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); HiveRulesRegistry registry = new HiveRulesRegistry(); - HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, corrScalarRexSQWithAgg); + Properties calciteConfigProperties = new Properties(); + calciteConfigProperties.setProperty( + CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), + Boolean.FALSE.toString()); + CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties); + HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, + corrScalarRexSQWithAgg); RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); final RexBuilder rexBuilder = cluster.getRexBuilder(); final RelOptCluster optCluster = RelOptCluster.create(planner, rexBuilder); @@ -1323,10 +1333,16 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation"); + // Create executor + RexExecutor executorProvider = new HiveRexExecutorImpl(optCluster); + calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider); + // We need to get the ColumnAccessInfo and viewToTableSchema for views. HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveRelFactories.HIVE_BUILDER.create(optCluster, null), this.columnAccessInfo, this.viewProjectToTableSchema); + + //basePlan.getCluster().getPlanner().setExecutor(executorProvider); fieldTrimmer.trim(calciteGenPlan); // Create and set MD provider @@ -1334,9 +1350,6 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu RelMetadataQuery.THREAD_PROVIDERS.set( JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider())); - // Create executor - Executor executorProvider = new HiveRexExecutorImpl(optCluster); - //Remove subquery LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null, @@ -1558,7 +1571,7 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { * executor * @return */ - private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider, Executor executorProvider) { + private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) { // TODO: Decorelation of subquery should be done before attempting // Partition Pruning; otherwise Expression evaluation may try to execute // corelated sub query. @@ -1576,13 +1589,13 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv "Calcite: HiveProjectOverIntersectRemoveRule and HiveIntersectMerge rules"); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.BOTTOM_UP, + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, HiveIntersectRewriteRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: HiveIntersectRewrite rule"); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.BOTTOM_UP, + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, HiveExceptRewriteRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: HiveExceptRewrite rule"); @@ -1595,7 +1608,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // Its not clear, if this rewrite is always performant on MR, since extra map phase // introduced for 2nd MR job may offset gains of this multi-stage aggregation. // We need a cost model for MR to enable this on MR. - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveExpandDistinctAggregatesRule.INSTANCE); + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveExpandDistinctAggregatesRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite"); } @@ -1606,7 +1619,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.ARBITRARY, + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.ARBITRARY, new HivePreFilteringRule(maxCNFNodeCount)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); @@ -1661,10 +1674,10 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE); final long reductionTuples = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES); - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveSortMergeRule.INSTANCE, + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveSortMergeRule.INSTANCE, HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE, HiveSortUnionReduceRule.INSTANCE); - basePlan = hepPlan(basePlan, true, mdProvider, null, HepMatchOrder.BOTTOM_UP, + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, new HiveSortRemoveRule(reductionProportion, reductionTuples), HiveProjectSortTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, @@ -1673,14 +1686,14 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // 5. Push Down Semi Joins perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, SemiJoinJoinTransposeRule.INSTANCE, + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); // 6. Apply Partition Pruning perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, new HivePartitionPruneRule(conf)); + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, new HivePartitionPruneRule(conf)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Partition Pruning"); @@ -1695,7 +1708,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // 8. Merge, remove and reduce Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Merge Project-Project"); @@ -1705,7 +1718,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // storage (incase there are filters on non partition cols). This only // matches FIL-PROJ-TS perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, HiveProjectFilterPullUpConstantsRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, @@ -1725,7 +1738,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv * @return optimized RelNode */ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, Executor executorProvider, RelOptRule... rules) { + RelMetadataProvider mdProvider, RexExecutor executorProvider, RelOptRule... rules) { return hepPlan(basePlan, followPlanChanges, mdProvider, executorProvider, HepMatchOrder.TOP_DOWN, rules); } @@ -1742,7 +1755,7 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, * @return optimized RelNode */ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, Executor executorProvider, HepMatchOrder order, + RelMetadataProvider mdProvider, RexExecutor executorProvider, HepMatchOrder order, RelOptRule... rules) { RelNode optimizedRelNode = basePlan; @@ -1769,6 +1782,7 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, if (executorProvider != null) { basePlan.getCluster().getPlanner().setExecutor(executorProvider); + planner.setExecutor(executorProvider); } planner.setRoot(basePlan); @@ -2248,7 +2262,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc } metrics.add(field.getName()); } - List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); + List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals); @@ -4149,5 +4163,4 @@ private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { DRUID, NATIVE } - } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java index 7229cc7..4823950 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java @@ -61,7 +61,7 @@ public void testRuleFiredOnlyOnce() { // Create rules registry to not trigger a rule more than once HiveRulesRegistry registry = new HiveRulesRegistry(); - HivePlannerContext context = new HivePlannerContext(null, registry, null); + HivePlannerContext context = new HivePlannerContext(null, registry, null, null); HepPlanner planner = new HepPlanner(programBuilder.build(), context); // Cluster diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index fe4bc4f..a2f5dbf 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -1023,7 +1023,7 @@ STAGE PLANS: alias: subq1:a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) @@ -1126,7 +1126,7 @@ STAGE PLANS: alias: subq2:a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) diff --git a/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out index 0bb18cc..6d6a38a 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out @@ -28,36 +28,44 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false TableScan alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: key, value, ds - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -203,153 +211,46 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /src [a] /srcpart/ds=2008-04-08/hr=11 [b] /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE.ds = '2008-04-08')} + Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key0) > 15.0) and (UDFToDouble(key0) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,key0,value0 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: key, value, key1, value1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns key,value,key1,value1 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -369,8 +270,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -384,8 +283,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -431,18 +328,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) null sort order: a sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: value (type: string) auto parallelism: false @@ -452,18 +349,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) null sort order: a sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: value (type: string) auto parallelism: false @@ -620,42 +517,38 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,key0,value0 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: key, value, key1, value1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns key,value,key1,value1 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/constprog2.q.out b/ql/src/test/results/clientpositive/constprog2.q.out index 7bfd0cf..cbc5fd2 100644 --- a/ql/src/test/results/clientpositive/constprog2.q.out +++ b/ql/src/test/results/clientpositive/constprog2.q.out @@ -67,6 +67,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT src1.key, src1.key + 1, src2.value FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND cast(src1.key as double) = 86 @@ -90,28 +91,22 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 86.0) (type: boolean) Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE TableScan alias: src2 Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) = 86.0) (type: boolean) + predicate: (86 = key) (type: boolean) Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: @@ -119,17 +114,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 550 Data size: 5831 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col2 + Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), (_col0 + 1) (type: int), _col2 (type: string) + expressions: 86 (type: int), 87 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/druid_basic2.q.out b/ql/src/test/results/clientpositive/druid_basic2.q.out index bc9410b..1a5d43d 100644 --- a/ql/src/test/results/clientpositive/druid_basic2.q.out +++ b/ql/src/test/results/clientpositive/druid_basic2.q.out @@ -77,7 +77,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -104,7 +104,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":["delta"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":[],"metrics":["delta"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -135,7 +135,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -166,7 +166,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -212,7 +212,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: language is not null (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -237,7 +237,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: language is not null (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -274,7 +274,7 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select #### A masked pattern was here #### name default.druid_table_1 @@ -300,7 +300,7 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select #### A masked pattern was here #### name default.druid_table_1 @@ -399,7 +399,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false @@ -414,7 +414,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -442,7 +442,7 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select #### A masked pattern was here #### name default.druid_table_1 @@ -468,7 +468,7 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select #### A masked pattern was here #### name default.druid_table_1 @@ -545,8 +545,7 @@ LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -554,27 +553,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"day","dimensions":["robot","language"],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator - expressions: robot (type: string), language (type: string), __time (type: timestamp), added (type: float), delta (type: float) - outputColumnNames: robot, language, __time, added, delta + expressions: robot (type: string), __time (type: timestamp), $f3 (type: float), $f4 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: max(added), sum(delta) - keys: robot (type: string), language (type: string), floor_day(__time) (type: timestamp) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int), _col2 (type: float) + null sort order: az + sort order: +- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp) - null sort order: aaa - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - tag: -1 - value expressions: _col3 (type: float), _col4 (type: double) - auto parallelism: false + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: timestamp), _col3 (type: float) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -592,6 +589,8 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"day","dimensions":["robot","language"],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -616,6 +615,8 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"day","dimensions":["robot","language"],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -634,81 +635,8 @@ STAGE PLANS: /druid_table_1 [druid_table_1] Needs Tagging: false Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: float), _col4 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1,_col2,_col3 - columns.types string,timestamp,float,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: UDFToInteger(_col0) (type: int), _col2 (type: float) - null sort order: az - sort order: +- - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: timestamp), _col3 (type: double) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1,_col2,_col3 - columns.types string,timestamp,float,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1,_col2,_col3 - columns.types string,timestamp,float,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: float), VALUE._col2 (type: double) + expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: float), VALUE._col2 (type: float) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Limit @@ -726,7 +654,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 - columns.types string:timestamp:float:double + columns.types string:timestamp:float:float escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/druid_intervals.q.out b/ql/src/test/results/clientpositive/druid_intervals.q.out index ca3febf..dc520b7 100644 --- a/ql/src/test/results/clientpositive/druid_intervals.q.out +++ b/ql/src/test/results/clientpositive/druid_intervals.q.out @@ -79,7 +79,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -109,7 +109,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/2012-03-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/2012-03-01T08:00:00.000"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -139,7 +139,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2012-03-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T08:00:00.000/2012-03-01T08:00:00.001"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -171,7 +171,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2011-01-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T08:00:00.000/2011-01-01T08:00:00.000"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -191,24 +191,40 @@ FROM druid_table_1 WHERE `__time` BETWEEN '2010-01-01 00:00:00' AND '2011-01-01 00:00:00' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + filterExpr: __time BETWEEN 2010-01-01 00:00:00.0 AND 2011-01-01 00:00:00.0 (type: boolean) + properties: + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: __time BETWEEN 2010-01-01 00:00:00.0 AND 2011-01-01 00:00:00.0 (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: __time (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2011-01-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT `__time` @@ -233,7 +249,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2011-01-01T00:00:00.001Z","2012-01-01T00:00:00.000Z/2013-01-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":[],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -265,7 +281,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2012-01-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":[],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -285,24 +301,40 @@ FROM druid_table_1 WHERE `__time` IN ('2010-01-01 00:00:00','2011-01-01 00:00:00') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + filterExpr: (__time) IN (2010-01-01 00:00:00.0, 2011-01-01 00:00:00.0) (type: boolean) + properties: + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (__time) IN (2010-01-01 00:00:00.0, 2011-01-01 00:00:00.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: __time (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z","2011-01-01T00:00:00.000Z/2011-01-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT `__time`, robot @@ -315,24 +347,40 @@ FROM druid_table_1 WHERE robot = 'user1' AND `__time` IN ('2010-01-01 00:00:00','2011-01-01 00:00:00') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + filterExpr: ((__time) IN (2010-01-01 00:00:00.0, 2011-01-01 00:00:00.0) and (robot = 'user1')) (type: boolean) + properties: + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((__time) IN (2010-01-01 00:00:00.0, 2011-01-01 00:00:00.0) and (robot = 'user1')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: __time (type: timestamp), 'user1' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z","2011-01-01T00:00:00.000Z/2011-01-01T00:00:00.001Z"],"filter":{"type":"selector","dimension":"robot","value":"user1"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp), 'user1' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT `__time`, robot @@ -354,13 +402,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: druid_table_1 - filterExpr: ((__time) IN ('2010-01-01 00:00:00', '2011-01-01 00:00:00') or (robot = 'user1')) (type: boolean) + filterExpr: ((__time) IN (2010-01-01 00:00:00.0, 2011-01-01 00:00:00.0) or (robot = 'user1')) (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((__time) IN ('2010-01-01 00:00:00', '2011-01-01 00:00:00') or (robot = 'user1')) (type: boolean) + predicate: ((__time) IN (2010-01-01 00:00:00.0, 2011-01-01 00:00:00.0) or (robot = 'user1')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), robot (type: string) diff --git a/ql/src/test/results/clientpositive/druid_timeseries.q.out b/ql/src/test/results/clientpositive/druid_timeseries.q.out index 6b2ffe9..4ecf67f 100644 --- a/ql/src/test/results/clientpositive/druid_timeseries.q.out +++ b/ql/src/test/results/clientpositive/druid_timeseries.q.out @@ -79,11 +79,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"longMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"doubleMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: $f0 (type: bigint), $f1 (type: float) + expressions: $f0 (type: float), $f1 (type: float) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -109,11 +109,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"NONE","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"none","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -139,11 +139,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"YEAR","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"year","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -169,11 +169,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"QUARTER","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"quarter","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -199,11 +199,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"MONTH","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"month","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -229,11 +229,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"WEEK","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"week","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -259,11 +259,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"DAY","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"day","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -289,11 +289,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"HOUR","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"hour","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -319,11 +319,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"MINUTE","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"minute","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -349,11 +349,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"SECOND","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"second","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -381,11 +381,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"HOUR","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"hour","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) + expressions: __time (type: timestamp), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -418,7 +418,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -495,7 +495,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator diff --git a/ql/src/test/results/clientpositive/druid_topn.q.out b/ql/src/test/results/clientpositive/druid_topn.q.out index 57d6586..387c4c3 100644 --- a/ql/src/test/results/clientpositive/druid_topn.q.out +++ b/ql/src/test/results/clientpositive/druid_topn.q.out @@ -85,11 +85,11 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"all","dimension":"robot","metric":"$f1","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":100} - druid.query.type topN + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f1","direction":"descending"}]},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: robot (type: string), $f1 (type: bigint), $f2 (type: float) + expressions: robot (type: string), $f1 (type: float), $f2 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -109,24 +109,54 @@ ORDER BY s DESC LIMIT 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"none","dimensions":["robot"],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: __time (type: timestamp), robot (type: string), $f2 (type: float), $f3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: float) + sort order: - + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: float) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: timestamp), VALUE._col1 (type: string), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: timestamp), _col2 (type: float), _col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"NONE","dimension":"robot","metric":"$f3","aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":100} - druid.query.type topN - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_year(`__time`), max(added), sum(variation) as s @@ -143,24 +173,50 @@ ORDER BY s DESC LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"year","dimensions":["robot"],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), __time (type: timestamp), $f2 (type: float), $f3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: float) + sort order: - + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: timestamp), _col2 (type: float) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator - limit: -1 + limit: 10 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"YEAR","dimension":"robot","metric":"$f3","aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":10} - druid.query.type topN - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_month(`__time`), max(added), sum(variation) as s @@ -177,24 +233,50 @@ ORDER BY s LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"month","dimensions":["robot"],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), __time (type: timestamp), $f2 (type: float), $f3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: float) + sort order: + + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: timestamp), _col2 (type: float) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator - limit: -1 + limit: 10 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"ascending"}]},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s @@ -211,24 +293,54 @@ ORDER BY s DESC, m DESC LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"month","dimensions":["robot","namespace"],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), __time (type: timestamp), $f3 (type: float), $f4 (type: float) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: float), _col3 (type: float) + sort order: -- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col2 (type: timestamp) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col2 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: float), _col4 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot","namespace"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f4","direction":"descending"},{"dimension":"$f3","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s @@ -245,24 +357,54 @@ ORDER BY robot ASC, m DESC LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"month","dimensions":["robot","namespace"],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), __time (type: timestamp), $f3 (type: float), $f4 (type: float) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col3 (type: float) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: timestamp), _col4 (type: float) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: float), VALUE._col2 (type: float) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: float), _col4 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot","namespace"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending"},{"dimension":"$f3","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_year(`__time`), max(added), sum(variation) as s @@ -281,24 +423,54 @@ ORDER BY s LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"year","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: __time (type: timestamp), $f1_0 (type: float), $f2 (type: float) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: float) + sort order: + + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: timestamp), _col1 (type: float) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: timestamp), VALUE._col1 (type: float), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: '1' (type: string), _col0 (type: timestamp), _col1 (type: float), _col2 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"YEAR","dimensions":[],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f2","direction":"ascending"}]},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: '1' (type: string), __time (type: timestamp), $f1_0 (type: bigint), $f2 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_hour(`__time`), max(added) as m, sum(variation) @@ -333,7 +505,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 8c6f0f1..01d60f7 100644 --- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -321,7 +321,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col2 + _col4) > 2.0) or ((_col1 + 1) > 2)) (type: boolean) + predicate: (((_col2 + _col4) > 2) or ((_col1 + 1) > 2)) (type: boolean) Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/fouter_join_ppr.q.out b/ql/src/test/results/clientpositive/fouter_join_ppr.q.out index 1897aa7..2dfcc18 100644 --- a/ql/src/test/results/clientpositive/fouter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/fouter_join_ppr.q.out @@ -28,36 +28,44 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false TableScan alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -203,149 +211,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /src [$hdt$_0:a] /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0) and (UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -365,8 +266,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -380,8 +279,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -423,38 +320,46 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -600,153 +505,46 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /src [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:a] /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:a] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:a] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:a] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0) and (UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -766,8 +564,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM srcpart a @@ -781,8 +577,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -828,18 +622,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -849,18 +643,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -1017,38 +811,34 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1126,18 +916,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -1147,18 +937,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -1315,42 +1105,38 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/index_auto_unused.q.out b/ql/src/test/results/clientpositive/index_auto_unused.q.out index 8a270d6..cff5313 100644 --- a/ql/src/test/results/clientpositive/index_auto_unused.q.out +++ b/ql/src/test/results/clientpositive/index_auto_unused.q.out @@ -340,37 +340,25 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM srcpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: ((ds = '2008-04-09') and (12.0 = 12.0) and (UDFToDouble(key) < 10.0)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) < 10.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-09' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: srcpart + filterExpr: ((ds = '2008-04-09') and (12.0 = 12.0) and (UDFToDouble(key) < 10.0)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) < 10.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-09' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT * FROM srcpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join45.q.out b/ql/src/test/results/clientpositive/join45.q.out index 249fe9c..4932a76 100644 --- a/ql/src/test/results/clientpositive/join45.q.out +++ b/ql/src/test/results/clientpositive/join45.q.out @@ -33,7 +33,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col1 BETWEEN 100 AND 102 (type: boolean) + predicate: UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 (type: boolean) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -52,7 +52,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col1 BETWEEN 100 AND 102 (type: boolean) + predicate: UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -243,7 +243,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col1 BETWEEN 100 AND 102 (type: boolean) + predicate: UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 (type: boolean) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: @@ -257,7 +257,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col1 BETWEEN 100 AND 102 (type: boolean) + predicate: UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: @@ -367,7 +367,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 = _col2) or _col1 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102) (type: boolean) + predicate: ((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 or UDFToDouble(_col3) BETWEEN 100.0 AND 102.0) (type: boolean) Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 diff --git a/ql/src/test/results/clientpositive/join_merging.q.out b/ql/src/test/results/clientpositive/join_merging.q.out index 79a7023..ef9ca18 100644 --- a/ql/src/test/results/clientpositive/join_merging.q.out +++ b/ql/src/test/results/clientpositive/join_merging.q.out @@ -109,7 +109,7 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_size > 10) (type: boolean) + predicate: ((p_size > 10) and p_partkey is not null) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_size (type: int) @@ -124,20 +124,23 @@ STAGE PLANS: TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index a867bd2..841ef14 100644 --- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -1044,7 +1044,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) @@ -1063,7 +1063,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out index c08a534..b69d0bd 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out @@ -1261,7 +1261,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) @@ -1293,7 +1293,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out index 98d2328..4bdb186 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out @@ -274,7 +274,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] @@ -311,7 +311,7 @@ Stage-0 Select Operator [SEL_2] (rows=5 width=20) Output:["_col0","_col1","_col2"] Filter Operator [FIL_15] (rows=5 width=20) - predicate:((dimid = 100) and (dimid = 100) is not null) + predicate:(dimid = 100) TableScan [TS_0] (rows=10 width=20) default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap @@ -322,7 +322,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] @@ -372,7 +372,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] @@ -409,7 +409,7 @@ Stage-0 Select Operator [SEL_2] (rows=5 width=20) Output:["_col0","_col1","_col2"] Filter Operator [FIL_15] (rows=5 width=20) - predicate:((dimid = 100) and (dimid = 100) is not null) + predicate:(dimid = 100) TableScan [TS_0] (rows=10 width=20) default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap @@ -420,7 +420,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] @@ -459,7 +459,7 @@ Stage-0 Select Operator [SEL_2] (rows=5 width=20) Output:["_col0","_col1","_col2"] Filter Operator [FIL_15] (rows=5 width=20) - predicate:((dimid = 100) and (dimid = 100) is not null) + predicate:(dimid = 100) TableScan [TS_0] (rows=10 width=20) default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap @@ -470,7 +470,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out index 5f75977..feadca3 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out @@ -105,10 +105,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tt2 - filterExpr: ((timestamp_col_18 = timestamp_col_18) and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_18_min) AND DynamicValue(RS_23_t1_timestamp_col_18_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_18_bloom_filter)))) (type: boolean) + filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_18_min) AND DynamicValue(RS_23_t1_timestamp_col_18_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_18_bloom_filter)))) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((timestamp_col_18 = timestamp_col_18) and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_18_min) AND DynamicValue(RS_23_t1_timestamp_col_18_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_18_bloom_filter)))) (type: boolean) + predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_18_min) AND DynamicValue(RS_23_t1_timestamp_col_18_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_18_bloom_filter)))) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp) diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 53839a3..285ffa0 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -474,14 +474,14 @@ Stage-0 Output:["_col6","_col2"] Filter Operator [FIL_30] (rows=1 width=16) predicate:(((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0)) - Merge Join Operator [MERGEJOIN_48] (rows=3 width=16) + Merge Join Operator [MERGEJOIN_49] (rows=3 width=16) Conds:RS_27._col0=RS_28._col0(Inner),Output:["_col1","_col2","_col6"] <-Map 10 [SIMPLE_EDGE] llap SHUFFLE [RS_28] PartitionCols:_col0 Select Operator [SEL_26] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_46] (rows=18 width=84) + Filter Operator [FIL_47] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_24] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] @@ -492,8 +492,8 @@ Stage-0 Output:["_col0","_col1","_col2"] Filter Operator [FIL_22] (rows=1 width=101) predicate:((_col1 + _col4) >= 0) - Merge Join Operator [MERGEJOIN_47] (rows=1 width=101) - Conds:RS_19._col0=RS_20._col0(Left Outer),Output:["_col0","_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_48] (rows=1 width=101) + Conds:RS_19._col0=RS_20._col0(Inner),Output:["_col0","_col1","_col2","_col4"] <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_19] PartitionCols:_col0 @@ -510,8 +510,8 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_3] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_44] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and key is not null) + Filter Operator [FIL_45] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 9 [SIMPLE_EDGE] llap @@ -526,8 +526,8 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_14] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_45] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and key is not null) + Filter Operator [FIL_46] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) TableScan [TS_11] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -540,74 +540,86 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 8 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Map 9 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 5 llap - File Output Operator [FS_31] - Select Operator [SEL_30] (rows=1 width=20) + Reducer 6 llap + File Output Operator [FS_36] + Select Operator [SEL_35] (rows=1 width=20) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_29] (rows=1 width=20) + Group By Operator [GBY_34] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_33] PartitionCols:_col0, _col1 - Group By Operator [GBY_27] (rows=1 width=20) + Group By Operator [GBY_32] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col2, _col6 - Select Operator [SEL_26] (rows=1 width=20) + Select Operator [SEL_31] (rows=3 width=16) Output:["_col2","_col6"] - Filter Operator [FIL_25] (rows=1 width=20) - predicate:(((_col1 + _col4) >= 2) and ((_col1 > 0) or (_col6 >= 0))) - Merge Join Operator [MERGEJOIN_36] (rows=4 width=20) - Conds:RS_21._col0=RS_22._col0(Right Outer),RS_21._col0=RS_23._col0(Right Outer),Output:["_col1","_col2","_col4","_col6"] - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + Filter Operator [FIL_30] (rows=3 width=16) + predicate:((_col1 > 0) or (_col6 >= 0)) + Merge Join Operator [MERGEJOIN_46] (rows=3 width=16) + Conds:RS_27._col0=RS_28._col0(Inner),Output:["_col1","_col2","_col6"] + <-Map 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_28] PartitionCols:_col0 - Select Operator [SEL_20] (rows=20 width=84) + Select Operator [SEL_26] (rows=18 width=84) Output:["_col0","_col1"] - TableScan [TS_19] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] + Filter Operator [FIL_44] (rows=18 width=84) + predicate:key is not null + TableScan [TS_24] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_27] PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) + Select Operator [SEL_23] (rows=1 width=101) Output:["_col0","_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - Select Operator [SEL_6] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_34] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) - Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) - Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) - Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) - TableScan [TS_11] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Filter Operator [FIL_22] (rows=1 width=101) + predicate:((_col1 + _col4) >= 2) + Merge Join Operator [MERGEJOIN_45] (rows=1 width=101) + Conds:RS_19._col0=RS_20._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + Select Operator [SEL_6] (rows=1 width=105) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_5] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_42] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=1 width=89) + Output:["_col0","_col1"] + Group By Operator [GBY_16] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_14] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:key, c_int, c_float + Filter Operator [FIL_43] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_11] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int PREHOOK: type: QUERY @@ -616,86 +628,96 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 llap - File Output Operator [FS_37] - Select Operator [SEL_36] (rows=1 width=20) + Reducer 7 llap + File Output Operator [FS_41] + Select Operator [SEL_40] (rows=1 width=20) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_35] - Group By Operator [GBY_33] (rows=1 width=20) + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] + Group By Operator [GBY_37] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_36] PartitionCols:_col0, _col1 - Group By Operator [GBY_31] (rows=1 width=20) + Group By Operator [GBY_35] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col6, _col2 - Select Operator [SEL_30] (rows=1 width=20) + Select Operator [SEL_34] (rows=1 width=16) Output:["_col6","_col2"] - Filter Operator [FIL_29] (rows=1 width=20) - predicate:(((_col1 + _col4) >= 0) and ((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0)) - Merge Join Operator [MERGEJOIN_42] (rows=4 width=20) - Conds:RS_25._col0=RS_26._col0(Outer),RS_25._col0=RS_27._col0(Right Outer),Output:["_col1","_col2","_col4","_col6"] - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_27] + Filter Operator [FIL_33] (rows=1 width=16) + predicate:(((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0)) + Merge Join Operator [MERGEJOIN_51] (rows=3 width=16) + Conds:RS_30._col0=RS_31._col0(Inner),Output:["_col1","_col2","_col6"] + <-Map 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_31] PartitionCols:_col0 - Select Operator [SEL_24] (rows=20 width=84) + Select Operator [SEL_29] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_41] (rows=20 width=84) - predicate:(c_int > 0) - TableScan [TS_22] (rows=20 width=84) + Filter Operator [FIL_49] (rows=18 width=84) + predicate:((c_int > 0) and key is not null) + TableScan [TS_27] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_30] PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) + Select Operator [SEL_26] (rows=1 width=101) Output:["_col0","_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - Select Operator [SEL_6] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=1 width=89) - Output:["_col0","_col1"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - Select Operator [SEL_17] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_16] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_40] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) - TableScan [TS_11] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Filter Operator [FIL_25] (rows=1 width=101) + predicate:((_col1 + _col4) >= 0) + Merge Join Operator [MERGEJOIN_50] (rows=1 width=101) + Conds:RS_22._col0=RS_23._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=1 width=89) + Output:["_col0","_col1"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_19] + Select Operator [SEL_17] (rows=1 width=105) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_16] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_14] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_48] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_11] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + Select Operator [SEL_6] (rows=1 width=105) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_5] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_47] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -762,7 +784,7 @@ Stage-0 Group By Operator [GBY_3] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_39] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and key is not null) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [SIMPLE_EDGE] llap @@ -778,7 +800,7 @@ Stage-0 Group By Operator [GBY_11] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float Filter Operator [FIL_40] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and key is not null) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) TableScan [TS_8] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1164,34 +1186,34 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_19] - Select Operator [SEL_18] (rows=36 width=101) + Select Operator [SEL_18] (rows=72 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_17] (rows=36 width=101) + Filter Operator [FIL_17] (rows=72 width=101) predicate:((_col1 > 0) or (_col6 >= 0)) - Merge Join Operator [MERGEJOIN_28] (rows=36 width=101) + Merge Join Operator [MERGEJOIN_29] (rows=72 width=101) Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 Select Operator [SEL_13] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=18 width=84) + Filter Operator [FIL_27] (rows=18 width=84) predicate:key is not null TableScan [TS_11] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0 - Filter Operator [FIL_9] (rows=10 width=182) - predicate:(((_col1 + _col4) = 2) and ((_col4 + 1) = 2)) - Merge Join Operator [MERGEJOIN_27] (rows=40 width=182) - Conds:RS_6._col0=RS_7._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_9] (rows=20 width=182) + predicate:((_col1 + _col4) = 2) + Merge Join Operator [MERGEJOIN_28] (rows=40 width=182) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 Select Operator [SEL_2] (rows=9 width=93) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_24] (rows=9 width=93) + Filter Operator [FIL_25] (rows=9 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1200,7 +1222,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=9 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_25] (rows=9 width=93) + Filter Operator [FIL_26] (rows=9 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1212,45 +1234,55 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_14] - Select Operator [SEL_13] (rows=50 width=101) + Reducer 3 llap + File Output Operator [FS_19] + Select Operator [SEL_18] (rows=72 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_12] (rows=50 width=101) - predicate:(((_col1 + _col4) = 2) and ((_col1 > 0) or (_col6 >= 0)) and ((_col4 + 1) = 2)) - Merge Join Operator [MERGEJOIN_19] (rows=200 width=101) - Conds:RS_8._col0=RS_9._col0(Right Outer),RS_8._col0=RS_10._col0(Right Outer),Output:["_col1","_col2","_col3","_col4","_col6"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_17] (rows=10 width=93) - predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0))) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + Filter Operator [FIL_17] (rows=72 width=101) + predicate:((_col1 > 0) or (_col6 >= 0)) + Merge Join Operator [MERGEJOIN_29] (rows=72 width=101) + Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_5] (rows=10 width=89) + Select Operator [SEL_13] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_18] (rows=10 width=93) - predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0))) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] + Filter Operator [FIL_27] (rows=18 width=84) + predicate:key is not null + TableScan [TS_11] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col0 - Select Operator [SEL_7] (rows=20 width=84) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + Filter Operator [FIL_9] (rows=20 width=182) + predicate:((_col1 + _col4) = 2) + Merge Join Operator [MERGEJOIN_28] (rows=40 width=182) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=9 width=93) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_25] (rows=9 width=93) + predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=9 width=89) + Output:["_col0","_col1"] + Filter Operator [FIL_26] (rows=9 width=93) + predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key order by x limit 1 PREHOOK: type: QUERY @@ -1680,7 +1712,7 @@ Stage-0 Group By Operator [GBY_3] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_48] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 9 [SIMPLE_EDGE] llap @@ -1702,7 +1734,7 @@ Stage-0 Group By Operator [GBY_15] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_49] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (UDFToDouble(key) > 0.0)) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and (UDFToDouble(key) > 0.0)) TableScan [TS_12] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -2432,10 +2464,10 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] llap SHUFFLE [RS_22] PartitionCols:_col0, _col1 - Select Operator [SEL_17] (rows=1 width=223) + Select Operator [SEL_17] (rows=2 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_16] (rows=1 width=219) - predicate:(_col0 = _col0) + Filter Operator [FIL_16] (rows=2 width=219) + predicate:_col0 is not null Group By Operator [GBY_14] (rows=2 width=219) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 6 [SIMPLE_EDGE] llap @@ -2470,41 +2502,40 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_36] - Select Operator [SEL_35] (rows=26 width=125) + File Output Operator [FS_31] + Select Operator [SEL_30] (rows=26 width=125) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_34] - Select Operator [SEL_33] (rows=26 width=125) + SHUFFLE [RS_29] + Select Operator [SEL_28] (rows=26 width=125) Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=26 width=145) + Filter Operator [FIL_27] (rows=26 width=145) predicate:((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) - Merge Join Operator [MERGEJOIN_42] (rows=26 width=145) - Conds:RS_29.UDFToDouble(_col1)=RS_30._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_37] (rows=26 width=145) + Conds:RS_24.UDFToDouble(_col1)=RS_25._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_29] + SHUFFLE [RS_24] PartitionCols:UDFToDouble(_col1) - Merge Join Operator [MERGEJOIN_41] (rows=26 width=141) + Merge Join Operator [MERGEJOIN_36] (rows=26 width=141) Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_26] + PARTITION_ONLY_SHUFFLE [RS_21] Select Operator [SEL_1] (rows=26 width=125) Output:["_col0","_col1"] TableScan [TS_0] (rows=26 width=125) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_size"] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_27] + PARTITION_ONLY_SHUFFLE [RS_22] Group By Operator [GBY_12] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["count()","count(_col0)"] Group By Operator [GBY_7] (rows=1 width=8) @@ -2513,32 +2544,25 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_6] Group By Operator [GBY_5] (rows=1 width=76) Output:["_col0"],aggregations:["avg(p_size)"] - Filter Operator [FIL_38] (rows=8 width=4) + Filter Operator [FIL_33] (rows=8 width=4) predicate:(p_size < 10) TableScan [TS_2] (rows=26 width=4) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_30] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] PartitionCols:_col0 - Select Operator [SEL_25] (rows=1 width=12) + Select Operator [SEL_20] (rows=1 width=12) Output:["_col0","_col1"] - Group By Operator [GBY_24] (rows=1 width=8) - Output:["_col0"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Group By Operator [GBY_22] (rows=1 width=8) - Output:["_col0"],keys:_col0 - Group By Operator [GBY_19] (rows=1 width=8) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_18] - Group By Operator [GBY_17] (rows=1 width=76) - Output:["_col0"],aggregations:["avg(p_size)"] - Filter Operator [FIL_40] (rows=8 width=4) - predicate:(p_size < 10) - TableScan [TS_14] (rows=26 width=4) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] + Group By Operator [GBY_19] (rows=1 width=8) + Output:["_col0"],aggregations:["avg(VALUE._col0)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=76) + Output:["_col0"],aggregations:["avg(p_size)"] + Filter Operator [FIL_35] (rows=8 width=4) + predicate:(p_size < 10) + TableScan [TS_14] (rows=26 width=4) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) from part b @@ -2564,10 +2588,9 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 10 <- Map 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) @@ -2577,51 +2600,44 @@ Stage-0 limit:-1 Stage-1 Reducer 5 llap - File Output Operator [FS_42] - Select Operator [SEL_41] (rows=3 width=106) + File Output Operator [FS_37] + Select Operator [SEL_36] (rows=3 width=106) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_40] - Select Operator [SEL_39] (rows=3 width=106) + SHUFFLE [RS_35] + Select Operator [SEL_34] (rows=3 width=106) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=3 width=126) + Filter Operator [FIL_33] (rows=3 width=126) predicate:(not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) - Merge Join Operator [MERGEJOIN_49] (rows=5 width=126) - Conds:RS_35._col0, _col1=RS_36._col0, _col1(Left Outer),Output:["_col0","_col1","_col3","_col4","_col7"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_36] + Merge Join Operator [MERGEJOIN_44] (rows=5 width=126) + Conds:RS_30._col0, _col1=RS_31._col0, _col1(Left Outer),Output:["_col0","_col1","_col3","_col4","_col7"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_31] PartitionCols:_col0, _col1 - Select Operator [SEL_31] (rows=1 width=110) + Select Operator [SEL_26] (rows=1 width=110) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_30] (rows=1 width=106) - predicate:(_col0 = _col0) - Group By Operator [GBY_28] (rows=1 width=106) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_27] - PartitionCols:_col0, _col1 - Group By Operator [GBY_26] (rows=1 width=106) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_24] (rows=1 width=114) - Output:["_col0","_col1"] - Filter Operator [FIL_46] (rows=1 width=114) - predicate:((_col1 = _col1) and ((_col2 - _col1) > 600.0)) - Group By Operator [GBY_22] (rows=5 width=114) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 - <-Map 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] - PartitionCols:_col0 - Group By Operator [GBY_20] (rows=5 width=114) - Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr - TableScan [TS_18] (rows=26 width=106) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] + Filter Operator [FIL_25] (rows=1 width=110) + predicate:_col0 is not null + Select Operator [SEL_24] (rows=1 width=110) + Output:["_col0","_col1"] + Filter Operator [FIL_41] (rows=1 width=114) + predicate:((_col1 = _col1) and ((_col2 - _col1) > 600.0)) + Group By Operator [GBY_22] (rows=5 width=114) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 + <-Map 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0 + Group By Operator [GBY_20] (rows=5 width=114) + Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr + TableScan [TS_18] (rows=26 width=106) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_35] + SHUFFLE [RS_30] PartitionCols:_col0, _col1 - Merge Join Operator [MERGEJOIN_48] (rows=5 width=122) - Conds:RS_32._col1=RS_33._col0(Left Outer),Output:["_col0","_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_43] (rows=5 width=122) + Conds:RS_27._col1=RS_28._col0(Left Outer),Output:["_col0","_col1","_col3","_col4"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] + SHUFFLE [RS_27] PartitionCols:_col1 Group By Operator [GBY_4] (rows=5 width=106) Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 @@ -2635,7 +2651,7 @@ Stage-0 TableScan [TS_0] (rows=26 width=106) default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_33] + SHUFFLE [RS_28] PartitionCols:_col0 Group By Operator [GBY_16] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 @@ -2646,7 +2662,7 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["count()","count(_col0)"],keys:_col1 Select Operator [SEL_12] (rows=1 width=114) Output:["_col1","_col0"] - Filter Operator [FIL_45] (rows=1 width=114) + Filter Operator [FIL_40] (rows=1 width=114) predicate:((_col1 = _col1) and ((_col2 - _col1) > 600.0)) Group By Operator [GBY_10] (rows=5 width=114) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out index 9804569..023627e 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out @@ -499,7 +499,7 @@ Stage-0 Reducer 2 llap File Output Operator [FS_8] Merge Join Operator [MERGEJOIN_9] (rows=150994944 width=431) - Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],residual filter predicates:{((_col2 = _col14) or _col1 BETWEEN 1 AND 10)} + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],residual filter predicates:{((_col2 = _col14) or UDFToInteger(_col1) BETWEEN 1 AND 10)} <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_4] Select Operator [SEL_1] (rows=12288 width=215) diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out index 2f53e60..25f0439 100644 --- a/ql/src/test/results/clientpositive/llap/lineage3.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -166,7 +166,7 @@ where key in (select key+18 from src1) order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + 18.0) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} 146 val_146 273 val_273 PREHOOK: query: select * from src1 a @@ -297,7 +297,7 @@ PREHOOK: type: CREATEVIEW PREHOOK: Input: default@alltypesorc PREHOOK: Output: database:default PREHOOK: Output: default@dest_v3 -{"version":"1.0","engine":"tez","database":"default","hash":"a0c2481ce1c24895a43a950f93a10da7","queryText":"create view dest_v3 (a1, a2, a3, a4, a5, a6, a7) as\n select x.csmallint, x.cbigint bint1, x.ctinyint, c.cbigint bint2, x.cint, x.cfloat, c.cstring1\n from alltypesorc c\n join (\n select a.csmallint csmallint, a.ctinyint ctinyint, a.cstring2 cstring2,\n a.cint cint, a.cstring1 ctring1, b.cfloat cfloat, b.cbigint cbigint\n from ( select * from alltypesorc a where cboolean1=true ) a\n join alltypesorc b on (a.csmallint = b.cint)\n ) x on (x.ctinyint = c.cbigint)\n where x.csmallint=11\n and x.cint > 899\n and x.cfloat > 4.5\n and c.cstring1 < '7'\n and x.cint + x.cfloat + length(c.cstring1) < 1000","edges":[{"sources":[],"targets":[0],"expression":"11","edgeType":"PROJECTION"},{"sources":[7],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[8],"targets":[3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[11],"targets":[6],"edgeType":"PROJECTION"},{"sources":[11,7],"targets":[0,1,3,2,4,5,6],"expression":"((c.cstring1 < '7') and c.cbigint is not null)","edgeType":"PREDICATE"},{"sources":[7,8],"targets":[0,1,3,2,4,5,6],"expression":"(c.cbigint = UDFToLong(a.ctinyint))","edgeType":"PREDICATE"},{"sources":[10,9],"targets":[0,1,3,2,4,5,6],"expression":"((b.cfloat > 4.5) and (11 = b.cint))","edgeType":"PREDICATE"},{"sources":[12,13,9,8],"targets":[0,1,3,2,4,5,6],"expression":"(a.cboolean1 and (a.csmallint = 11) and (a.cint > 899) and a.ctinyint is not null)","edgeType":"PREDICATE"},{"sources":[9,10,11],"targets":[0,1,3,2,4,5,6],"expression":"(((UDFToFloat(a.cint) + b.cfloat) + UDFToFloat(length(c.cstring1))) < 1000.0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_v3.csmallint"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_v3.bint1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_v3.bint2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_v3.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_v3.cint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_v3.cfloat"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_v3.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":10,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"},{"id":11,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":12,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":13,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"a0c2481ce1c24895a43a950f93a10da7","queryText":"create view dest_v3 (a1, a2, a3, a4, a5, a6, a7) as\n select x.csmallint, x.cbigint bint1, x.ctinyint, c.cbigint bint2, x.cint, x.cfloat, c.cstring1\n from alltypesorc c\n join (\n select a.csmallint csmallint, a.ctinyint ctinyint, a.cstring2 cstring2,\n a.cint cint, a.cstring1 ctring1, b.cfloat cfloat, b.cbigint cbigint\n from ( select * from alltypesorc a where cboolean1=true ) a\n join alltypesorc b on (a.csmallint = b.cint)\n ) x on (x.ctinyint = c.cbigint)\n where x.csmallint=11\n and x.cint > 899\n and x.cfloat > 4.5\n and c.cstring1 < '7'\n and x.cint + x.cfloat + length(c.cstring1) < 1000","edges":[{"sources":[],"targets":[0],"expression":"11","edgeType":"PROJECTION"},{"sources":[7],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[8],"targets":[3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[11],"targets":[6],"edgeType":"PROJECTION"},{"sources":[11,7],"targets":[0,1,3,2,4,5,6],"expression":"((c.cstring1 < '7') and c.cbigint is not null)","edgeType":"PREDICATE"},{"sources":[7,8],"targets":[0,1,3,2,4,5,6],"expression":"(c.cbigint = UDFToLong(a.ctinyint))","edgeType":"PREDICATE"},{"sources":[10,9],"targets":[0,1,3,2,4,5,6],"expression":"((b.cfloat > 4.5) and (11 = b.cint))","edgeType":"PREDICATE"},{"sources":[12,13,9,8],"targets":[0,1,3,2,4,5,6],"expression":"(a.cboolean1 and (a.csmallint = 11) and (a.cint > 899) and a.ctinyint is not null)","edgeType":"PREDICATE"},{"sources":[9,10,11],"targets":[0,1,3,2,4,5,6],"expression":"(((UDFToFloat(a.cint) + b.cfloat) + UDFToFloat(length(c.cstring1))) < 1000)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_v3.csmallint"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_v3.bint1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_v3.bint2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_v3.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_v3.cint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_v3.cfloat"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_v3.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":10,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"},{"id":11,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":12,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":13,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"}]} PREHOOK: query: alter view dest_v3 as select * from ( select sum(a.ctinyint) over (partition by a.csmallint order by a.csmallint) a, diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index 4bba265..0fd9ec6 100644 --- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -788,7 +788,7 @@ STAGE PLANS: alias: orc_pred Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) + predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) @@ -864,10 +864,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) + filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) + predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index 01348a6..11e108f 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -726,7 +726,7 @@ STAGE PLANS: alias: tbl_pred Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) + predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) @@ -802,10 +802,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) + filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) + predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_extended.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_extended.q.out index 08a9bd1..834a251 100644 --- a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_extended.q.out +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_extended.q.out @@ -559,23 +559,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string), _col1 (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -3094,23 +3089,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string), _col1 (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 9e0a1d7..3004e36 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -255,22 +255,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (value = value) (type: boolean) - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 9737 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 107 Data size: 9737 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -283,10 +283,10 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -948,7 +948,7 @@ POSTHOOK: query: insert into t values(0) POSTHOOK: type: QUERY POSTHOOK: Output: default@t POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from t where exists (select count(*) from src where 1=2) PREHOOK: type: QUERY POSTHOOK: query: explain select * from t where exists (select count(*) from src where 1=2) @@ -963,7 +963,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -997,9 +997,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: true (type: boolean) - sort order: + - Map-reduce partition columns: true (type: boolean) + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap @@ -1014,10 +1012,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1032,16 +1030,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: true (type: boolean) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1049,7 +1040,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from t where exists (select count(*) from src where 1=2) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 1cbbe8f..f9cbd4d 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -308,7 +308,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) @@ -376,16 +376,11 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -440,10 +435,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -557,24 +551,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 @@ -642,8 +618,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -670,16 +645,20 @@ STAGE PLANS: Filter Operator predicate: ((value = value) and (key > '9')) (type: boolean) Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -700,24 +679,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -979,10 +940,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1030,19 +990,19 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (l_partkey = l_partkey) (type: boolean) - Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(l_quantity) keys: l_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -1106,30 +1066,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: double), _col0 (type: int) - mode: hash + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: int) - Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: int) - Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1191,18 +1137,18 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1230,21 +1176,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: complete + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1601,22 +1542,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_name = p_name) (type: boolean) - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_partkey (type: int) outputColumnNames: p_name, p_partkey - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_name (type: string), p_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1660,16 +1601,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2160,22 +2101,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_brand = p_brand) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_brand is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_brand (type: string) outputColumnNames: p_type, p_brand - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2203,12 +2144,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2694,18 +2635,18 @@ STAGE PLANS: alias: sc Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key = key) (type: boolean) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -2742,7 +2683,7 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col1 (type: string), _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) @@ -2798,16 +2739,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -3496,17 +3437,17 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -3538,21 +3479,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3636,17 +3577,17 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 2977 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2977 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 13 Data size: 2977 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -3656,17 +3597,17 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -3698,21 +3639,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col4 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3798,17 +3739,17 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -3818,17 +3759,17 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -3860,21 +3801,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 12 Data size: 2748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 5496 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 5496 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1374 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2748 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 6 Data size: 1374 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2748 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3944,17 +3885,17 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -3986,21 +3927,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -4058,10 +3999,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4087,18 +4027,18 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -4107,19 +4047,19 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -4172,18 +4112,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 7 Execution mode: llap @@ -4193,34 +4133,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: bigint) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -4252,9 +4174,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4280,19 +4201,19 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_partkey = p_partkey) (type: boolean) - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(p_size) keys: p_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -4322,34 +4243,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: double) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: double) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -4391,7 +4294,7 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) @@ -4405,11 +4308,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4444,7 +4346,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -4518,34 +4420,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4564,7 +4448,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4590,10 +4474,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 10 <- Map 9 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -4621,18 +4504,18 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -4641,19 +4524,19 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -4663,19 +4546,19 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -4687,41 +4570,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 11 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: bigint) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 2 Execution mode: llap @@ -4796,18 +4661,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 8 Execution mode: llap @@ -4817,18 +4682,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 @@ -4882,10 +4747,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 10 <- Map 9 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -4913,18 +4777,18 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_partkey = p_partkey) (type: boolean) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_partkey (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -4933,19 +4797,19 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_partkey = p_partkey) (type: boolean) - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(p_size) keys: p_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -4955,19 +4819,19 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_partkey = p_partkey) (type: boolean) - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(p_size) keys: p_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -4979,41 +4843,23 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 11 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: double) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: double) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 2 Execution mode: llap @@ -5088,18 +4934,18 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 8 Execution mode: llap @@ -5109,18 +4955,18 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 @@ -5227,7 +5073,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 UDFToLong(_col0) (type: bigint) 1 _col0 (type: bigint) @@ -5251,16 +5097,11 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -5393,9 +5234,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5421,7 +5261,7 @@ STAGE PLANS: alias: tt Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (j = j) (type: boolean) + predicate: j is not null (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: j (type: int), i (type: int) @@ -5468,9 +5308,8 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint), _col0 (type: int) - mode: hash + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -5478,19 +5317,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index d2016b3..9e6b9ad 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -1733,22 +1733,22 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_brand (type: string) outputColumnNames: p_type, p_brand - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 12 @@ -1757,17 +1757,17 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1777,22 +1777,22 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_brand (type: string) outputColumnNames: p_type, p_brand - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -1801,22 +1801,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_name (type: string) outputColumnNames: p_type, p_name - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -1825,17 +1825,17 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1846,16 +1846,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reducer 13 Execution mode: llap Reduce Operator Tree: @@ -1890,7 +1890,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) @@ -1909,16 +1909,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1929,12 +1929,12 @@ STAGE PLANS: 0 _col4 (type: string), _col1 (type: string) 1 _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 3217 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 3217 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -1946,12 +1946,12 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 6 Data size: 1633 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: string) sort order: ++ Map-reduce partition columns: _col3 (type: string), _col4 (type: string) - Statistics: Num rows: 6 Data size: 1633 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) Reducer 4 Execution mode: llap @@ -1963,17 +1963,17 @@ STAGE PLANS: 0 _col3 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 6 Data size: 1796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3891 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 3 Data size: 898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1985,16 +1985,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -2144,22 +2144,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2301,22 +2301,22 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_brand (type: string) outputColumnNames: p_type, p_brand - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 12 @@ -2325,17 +2325,17 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2345,22 +2345,22 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_brand (type: string) outputColumnNames: p_type, p_brand - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -2393,17 +2393,17 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2414,16 +2414,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reducer 13 Execution mode: llap Reduce Operator Tree: @@ -2458,7 +2458,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) @@ -2477,16 +2477,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2688,18 +2688,18 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_name (type: string), p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -2708,23 +2708,23 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_size = p_size) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_size (type: int), p_type (type: string) outputColumnNames: p_size, p_type - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(p_type) keys: p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2734,18 +2734,18 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_size = p_size) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -2755,19 +2755,19 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 2 Execution mode: llap @@ -2779,12 +2779,12 @@ STAGE PLANS: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 3217 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int) sort order: + Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 3217 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -2796,12 +2796,12 @@ STAGE PLANS: 0 _col5 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 6 Data size: 1633 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string), _col5 (type: int) sort order: ++ Map-reduce partition columns: _col3 (type: string), _col5 (type: int) - Statistics: Num rows: 6 Data size: 1633 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) Reducer 4 Execution mode: llap @@ -2813,17 +2813,17 @@ STAGE PLANS: 0 _col3 (type: string), _col5 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 6 Data size: 1796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3891 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 3 Data size: 898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2835,12 +2835,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -2849,12 +2849,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 @@ -2940,17 +2940,17 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_brand (type: string), p_type (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2960,22 +2960,22 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_brand (type: string) outputColumnNames: p_type, p_brand - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3028,16 +3028,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3081,7 +3081,7 @@ POSTHOOK: Input: default@part_null 17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -3103,12 +3103,11 @@ STAGE PLANS: Edges: Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Reducer 10 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Reducer 13 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 8 <- Reducer 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3234,24 +3233,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 13 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), true (type: boolean) outputColumnNames: _col0, _col1 @@ -3366,7 +3347,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product PREHOOK: query: select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -3437,18 +3418,18 @@ STAGE PLANS: alias: s2 Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (value = value) (type: boolean) - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 9737 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 107 Data size: 9737 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -3457,18 +3438,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (value = value) (type: boolean) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -3498,12 +3479,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 9737 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 107 Data size: 9737 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3514,7 +3495,7 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) @@ -3570,12 +3551,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index f1efb64..b947111 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -541,17 +541,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 219 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -598,7 +598,7 @@ Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select p_name, p_size from @@ -626,9 +626,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) @@ -676,24 +675,6 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: llap LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -849,16 +830,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -866,7 +847,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select p_name, p_size from part where part.p_size not in @@ -941,10 +922,9 @@ STAGE PLANS: Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 12 <- Map 11 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (SIMPLE_EDGE) - Reducer 14 <- Reducer 13 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) @@ -1092,31 +1072,13 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 14 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 @@ -1779,23 +1741,23 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), (p_size * p_size) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1805,22 +1767,22 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (p_size * p_size) (type: int), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col1 (type: string), _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1873,12 +1835,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -1887,23 +1849,23 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -2055,7 +2017,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col9 = 0) or (_col12 is null and (_col5 - 1) is not null and (_col10 >= _col9))) (type: boolean) + predicate: ((_col9 = 0) or (_col12 is null and _col5 is not null and (_col10 >= _col9))) (type: boolean) Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -2287,7 +2249,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col9 = 0) or (_col12 is null and (_col0 * _col5) is not null and (_col10 >= _col9))) (type: boolean) + predicate: ((_col9 = 0) or (_col12 is null and _col0 is not null and _col5 is not null and (_col10 >= _col9))) (type: boolean) Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -2478,23 +2440,23 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_name = p_name) (type: boolean) - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_partkey (type: int) outputColumnNames: p_name, p_partkey - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(p_partkey) keys: p_name (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1781 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1781 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2504,18 +2466,18 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_name = p_name) (type: boolean) - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_partkey (type: int), p_name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 11 @@ -2597,12 +2559,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1781 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1781 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -2611,16 +2573,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) Reducer 9 Execution mode: llap @@ -2632,12 +2594,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 (_col0 + 100) (type: int) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col3 (type: int) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -3010,7 +2972,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), true (type: boolean) @@ -3323,17 +3285,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean) Stage: Stage-0 @@ -3392,23 +3354,23 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_brand = p_brand) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_brand is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string), (UDFToDouble(p_type) + 2.0) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3418,22 +3380,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_brand = p_brand) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_brand is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(p_type) + 2.0) (type: double), p_brand (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: double), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1300 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1300 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3486,12 +3448,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -3500,19 +3462,19 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1300 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1300 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 3 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -3675,17 +3637,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 4 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 13 Execution mode: llap @@ -4196,23 +4158,23 @@ STAGE PLANS: alias: sc Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key = key) (type: boolean) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), concat('v', value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 103 Data size: 10609 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 103 Data size: 10609 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -4222,22 +4184,22 @@ STAGE PLANS: alias: sc Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key = key) (type: boolean) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: concat('v', value) (type: string), key (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col1 (type: string), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 33875 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 125 Data size: 33875 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 11 @@ -4310,12 +4272,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 2470 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 2470 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -4325,15 +4287,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 2470 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2470 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + Map-reduce partition columns: _col1 (type: bigint) - Statistics: Num rows: 26 Data size: 2470 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 5 Execution mode: llap @@ -4361,12 +4323,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 103 Data size: 10609 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 103 Data size: 10609 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 9 Execution mode: llap @@ -4375,23 +4337,23 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 33875 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 34375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 62 Data size: 17050 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 62 Data size: 17050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 62 Data size: 17050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -4570,7 +4532,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col9 = 0) or (_col12 is null and (_col5 - 1) is not null and (_col10 >= _col9))) (type: boolean) + predicate: ((_col9 = 0) or (_col12 is null and _col5 is not null and (_col10 >= _col9))) (type: boolean) Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -4815,7 +4777,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col9 = 0) or (_col12 is null and (_col5 - 1) is not null and (_col10 >= _col9))) (type: boolean) + predicate: ((_col9 = 0) or (_col12 is null and _col5 is not null and (_col10 >= _col9))) (type: boolean) Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -5698,19 +5660,19 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_brand = p_brand) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_brand is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(p_type) keys: p_brand (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5720,18 +5682,18 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_brand = p_brand) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_brand is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_brand (type: string), p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 11 @@ -5813,12 +5775,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -5827,16 +5789,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) Reducer 9 Execution mode: llap @@ -5848,12 +5810,12 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble((_col0 + 100)) (type: double) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col3 (type: int) - Statistics: Num rows: 5 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -6202,7 +6164,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 = _col0) (type: boolean) + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: char(100)), true (type: boolean) @@ -6431,23 +6393,23 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (b = b) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: b is not null (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: b (type: int), a (type: int) outputColumnNames: b, a - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(a) keys: b (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6457,22 +6419,22 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (b = b) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: b is not null (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: b (type: int), a (type: int) outputColumnNames: b, a - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: b (type: int), a (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6545,7 +6507,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 = _col0) (type: boolean) + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) @@ -6665,23 +6627,23 @@ STAGE PLANS: alias: t7 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (j = j) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: j is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: j (type: int), i (type: int) outputColumnNames: j, i - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6691,18 +6653,18 @@ STAGE PLANS: alias: t7 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (j = j) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: j is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: i (type: int), j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 8 @@ -6905,23 +6867,23 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (j = j) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: j (type: int), i (type: int) outputColumnNames: j, i - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6931,18 +6893,18 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (j = j) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: i (type: int), j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 8 @@ -7112,23 +7074,23 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (j = j) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: j (type: int), i (type: int) outputColumnNames: j, i - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -7138,18 +7100,18 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (j = j) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: i (type: int), j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -7218,7 +7180,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 = _col0) (type: boolean) + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) @@ -7733,17 +7695,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1656 Data size: 294768 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 828 Data size: 147384 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1656 Data size: 294768 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 828 Data size: 150696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1656 Data size: 301392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 828 Data size: 150696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1656 Data size: 301392 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 13 Execution mode: llap diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index da387d7..732d018 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -1121,7 +1121,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col12 Statistics: Num rows: 26 Data size: 16406 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col5 BETWEEN _col10 AND _col12 (type: boolean) + predicate: UDFToDouble(_col5) BETWEEN UDFToDouble(_col10) AND _col12 (type: boolean) Statistics: Num rows: 2 Data size: 1262 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -1709,23 +1709,23 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_name = p_name) (type: boolean) - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_partkey (type: int) outputColumnNames: p_name, p_partkey - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(p_partkey) keys: p_name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1777,16 +1777,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: boolean) Stage: Stage-0 @@ -1844,19 +1844,19 @@ STAGE PLANS: alias: part_null Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -1893,16 +1893,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: double), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: boolean) Stage: Stage-0 @@ -1968,19 +1968,19 @@ STAGE PLANS: alias: part_null Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(p_size) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -2099,16 +2099,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: boolean) Reducer 8 Execution mode: llap @@ -2222,19 +2222,19 @@ STAGE PLANS: alias: part_null Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(p_size) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -2335,16 +2335,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: boolean) Reducer 7 Execution mode: llap @@ -2510,18 +2510,18 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2606,16 +2606,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3820,17 +3820,17 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -3869,22 +3869,22 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col0 (type: string) outputColumnNames: _col3, _col0 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col0) keys: _col3 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 5 Execution mode: llap @@ -3894,16 +3894,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: boolean) Stage: Stage-0 @@ -4327,23 +4327,23 @@ STAGE PLANS: alias: pp Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_name (type: string) outputColumnNames: p_type, p_name - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(p_name) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -4355,16 +4355,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 4 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: boolean) Reducer 2 Execution mode: llap @@ -4417,17 +4417,17 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14 - Statistics: Num rows: 7 Data size: 982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1790 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not (_col1 like CASE WHEN (_col14 is null) THEN (null) ELSE (_col13) END)) (type: boolean) - Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 895 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 895 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 895 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4967,18 +4967,18 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (l_partkey = l_partkey) (type: boolean) - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -4987,19 +4987,19 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (l_partkey = l_partkey) (type: boolean) - Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(l_quantity) keys: l_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -5054,25 +5054,25 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -5081,16 +5081,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Stage: Stage-0 @@ -5151,17 +5151,17 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_brand (type: string), p_type (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -5171,23 +5171,23 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_brand (type: string) outputColumnNames: p_type, p_brand - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(p_brand) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -5201,10 +5201,10 @@ STAGE PLANS: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5219,24 +5219,24 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 13 Data size: 6565 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 13130 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END)) (type: boolean) - Statistics: Num rows: 7 Data size: 3535 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 6565 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 1575 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -5245,16 +5245,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 3796 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 6 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 3796 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: boolean) Stage: Stage-0 @@ -5458,23 +5458,23 @@ STAGE PLANS: alias: depts Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (name = name) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + predicate: name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: name (type: string), deptno (type: int) outputColumnNames: name, deptno - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(deptno) keys: name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5584,19 +5584,19 @@ STAGE PLANS: alias: depts Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (deptno = deptno) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + predicate: deptno is not null (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(name) keys: deptno (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -5704,23 +5704,23 @@ STAGE PLANS: alias: depts Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (name = name) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + predicate: name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: name (type: string), deptno (type: int) outputColumnNames: name, deptno - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(deptno) keys: name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5730,19 +5730,19 @@ STAGE PLANS: alias: depts Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (deptno = deptno) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + predicate: deptno is not null (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(name) keys: deptno (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5899,23 +5899,23 @@ STAGE PLANS: alias: depts Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (name = name) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + predicate: name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: name (type: string), deptno (type: int) outputColumnNames: name, deptno - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(deptno) keys: name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs diff --git a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index fbb2676..4a8955d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -1274,8 +1274,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 2)(children: LongColAddLongScalar(col 0, val 1) -> 2:long) -> boolean - predicate: (key + 1) is not null (type: boolean) + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) @@ -1317,8 +1317,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 2)(children: LongColAddLongScalar(col 0, val 1) -> 2:long) -> boolean - predicate: (key + 1) is not null (type: boolean) + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) diff --git a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index a58b1b2..1ff7dae 100644 --- a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out @@ -115,13 +115,13 @@ STAGE PLANS: 1 Map 2 Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string) + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2, 1, 3, 5] - selectExpressions: VectorUDFAdaptor(CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> 5:string + selectExpressions: VectorUDFAdaptor(CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3, col 3) -> 4:boolean) -> 5:string Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -280,8 +280,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> boolean - predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) + predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3, col 3) -> 4:boolean) -> boolean + predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index ee7b160..e078e85 100644 --- a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -208,8 +208,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean - predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 12) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean + predicate: (s is not null and dt is not null and ts is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) @@ -274,8 +274,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean - predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 12) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean + predicate: (s is not null and dt is not null and ts is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index b9b0691..9472387 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -30544,18 +30544,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean) + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30572,7 +30572,7 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Path -> Alias: #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/louter_join_ppr.q.out index 2f6cdfd..a4de667 100644 --- a/ql/src/test/results/clientpositive/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/louter_join_ppr.q.out @@ -30,18 +30,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -51,18 +51,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -219,38 +219,34 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -324,24 +320,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) auto parallelism: false TableScan alias: b @@ -349,18 +345,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -509,153 +505,46 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /src [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:a] /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:a] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:a] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:a] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -675,8 +564,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM srcpart a @@ -690,8 +577,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -1031,18 +916,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -1052,18 +937,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -1220,42 +1105,38 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/mergejoins.q.out b/ql/src/test/results/clientpositive/mergejoins.q.out index 1023f61..4a290a1 100644 --- a/ql/src/test/results/clientpositive/mergejoins.q.out +++ b/ql/src/test/results/clientpositive/mergejoins.q.out @@ -251,7 +251,7 @@ STAGE PLANS: Left Outer Join1 to 2 filter predicates: 0 - 1 {(UDFToDouble(KEY.reducesinkkey0) < UDFToDouble(10))} + 1 {(UDFToDouble(KEY.reducesinkkey0) < 10.0)} 2 keys: 0 _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/outer_join_ppr.q.out b/ql/src/test/results/clientpositive/outer_join_ppr.q.out index 2bf6d61..09f30b8 100644 --- a/ql/src/test/results/clientpositive/outer_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/outer_join_ppr.q.out @@ -28,36 +28,44 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false TableScan alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -203,149 +211,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /src [$hdt$_0:a] /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0) and (UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -365,8 +266,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -380,8 +279,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -427,18 +324,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -448,18 +345,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -616,38 +513,34 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/query1.q.out b/ql/src/test/results/clientpositive/perf/query1.q.out index 09278e3..b8340fd 100644 --- a/ql/src/test/results/clientpositive/perf/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/query1.q.out @@ -77,30 +77,30 @@ Stage-0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col2 - Select Operator [SEL_38] (rows=7918783 width=77) + Select Operator [SEL_38] (rows=15837566 width=77) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_37] (rows=7918783 width=77) + Group By Operator [GBY_37] (rows=15837566 width=77) Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col1 - Select Operator [SEL_33] (rows=15837566 width=77) + Select Operator [SEL_33] (rows=31675133 width=77) Output:["_col1","_col2"] - Group By Operator [GBY_32] (rows=15837566 width=77) + Group By Operator [GBY_32] (rows=31675133 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col0 - Group By Operator [GBY_30] (rows=31675133 width=77) + Group By Operator [GBY_30] (rows=63350266 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Select Operator [SEL_29] (rows=31675133 width=77) + Select Operator [SEL_29] (rows=63350266 width=77) Output:["_col2","_col1","_col3"] - Merge Join Operator [MERGEJOIN_77] (rows=31675133 width=77) + Merge Join Operator [MERGEJOIN_77] (rows=63350266 width=77) Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 - Select Operator [SEL_22] (rows=28795575 width=77) + Select Operator [SEL_22] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_72] (rows=28795575 width=77) - predicate:((sr_store_sk = sr_store_sk) and sr_returned_date_sk is not null) + Filter Operator [FIL_72] (rows=57591150 width=77) + predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) TableScan [TS_20] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] <-Map 14 [SIMPLE_EDGE] diff --git a/ql/src/test/results/clientpositive/perf/query15.q.out b/ql/src/test/results/clientpositive/perf/query15.q.out index 4f4dcc5..a331cd7 100644 --- a/ql/src/test/results/clientpositive/perf/query15.q.out +++ b/ql/src/test/results/clientpositive/perf/query15.q.out @@ -5,83 +5,77 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 - File Output Operator [FS_31] - Limit [LIM_30] (rows=100 width=135) + Reducer 5 + File Output Operator [FS_30] + Limit [LIM_29] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_29] (rows=174233858 width=135) + Select Operator [SEL_28] (rows=174233858 width=135) Output:["_col0","_col1"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_28] - Group By Operator [GBY_26] (rows=174233858 width=135) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_27] + Group By Operator [GBY_25] (rows=174233858 width=135) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_25] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0 - Group By Operator [GBY_24] (rows=348467716 width=135) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Select Operator [SEL_23] (rows=348467716 width=135) - Output:["_col7","_col2"] - Merge Join Operator [MERGEJOIN_47] (rows=348467716 width=135) - Conds:RS_20._col0=RS_21._col0(Inner),Output:["_col2","_col7"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col0 - Select Operator [SEL_19] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_44] (rows=18262 width=1119) - predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_17] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col0 - Select Operator [SEL_16] (rows=316788826 width=135) - Output:["_col0","_col2","_col7"] - Filter Operator [FIL_15] (rows=316788826 width=135) - predicate:((substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or (_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500)) - Merge Join Operator [MERGEJOIN_46] (rows=316788826 width=135) - Conds:RS_12._col0=RS_13._col1(Inner),Output:["_col3","_col4","_col5","_col7"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_43] (rows=287989836 width=135) - predicate:(cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_6] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=88000001 width=860) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col3","_col4"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_41] (rows=80000000 width=860) - predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=40000000 width=1014) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_42] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_zip"] + Group By Operator [GBY_23] (rows=348467716 width=135) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col4 + Merge Join Operator [MERGEJOIN_45] (rows=348467716 width=135) + Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col4","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_43] (rows=88000001 width=860) + Conds:RS_16._col1=RS_17._col0(Inner),Output:["_col0","_col4"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_39] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=40000000 width=1014) + Output:["_col0","_col2"] + Filter Operator [FIL_40] (rows=40000000 width=1014) + predicate:(((substr(ca_zip, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or (ca_state) IN ('CA', 'WA', 'GA')) and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_zip"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_44] (rows=316788826 width=135) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_41] (rows=287989836 width=135) + predicate:(cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_42] (rows=18262 width=1119) + predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) + TableScan [TS_9] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] diff --git a/ql/src/test/results/clientpositive/perf/query16.q.out b/ql/src/test/results/clientpositive/perf/query16.q.out index dceb3a5..3d6d805 100644 --- a/ql/src/test/results/clientpositive/perf/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/query16.q.out @@ -167,17 +167,17 @@ Stage-0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col0 - Select Operator [SEL_18] (rows=7199720 width=106) + Select Operator [SEL_18] (rows=14399440 width=106) Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=7199720 width=106) + Group By Operator [GBY_17] (rows=14399440 width=106) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 - Group By Operator [GBY_15] (rows=14399440 width=106) + Group By Operator [GBY_15] (rows=28798881 width=106) Output:["_col0"],keys:cr_order_number - Filter Operator [FIL_103] (rows=14399440 width=106) - predicate:(cr_order_number = cr_order_number) + Filter Operator [FIL_103] (rows=28798881 width=106) + predicate:cr_order_number is not null TableScan [TS_12] (rows=28798881 width=106) default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] <-Reducer 4 [SIMPLE_EDGE] @@ -228,7 +228,7 @@ Stage-0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_100] (rows=8116 width=1119) - predicate:(d_date BETWEEN '2001-4-01' AND 2001-05-31 01:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/perf/query21.q.out b/ql/src/test/results/clientpositive/perf/query21.q.out index 67a49e2..292fc4f 100644 --- a/ql/src/test/results/clientpositive/perf/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/query21.q.out @@ -133,7 +133,7 @@ Stage-0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0","_col1"] Filter Operator [FIL_42] (rows=8116 width=1119) - predicate:(d_date BETWEEN 1998-03-08 23:00:00.0 AND 1998-05-08 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-08 23:00:00.0 AND 1998-05-08 00:00:00.0 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out b/ql/src/test/results/clientpositive/perf/query23.q.out index 85cee23..a04e5cd 100644 --- a/ql/src/test/results/clientpositive/perf/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/query23.q.out @@ -1,7 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[379][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product -Warning: Shuffle Join MERGEJOIN[380][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product -Warning: Shuffle Join MERGEJOIN[382][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 51' is a cross product -Warning: Shuffle Join MERGEJOIN[383][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 52' is a cross product +Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 49' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from store_sales @@ -107,42 +105,38 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) -Reducer 16 <- Map 23 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 28 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 26 (CUSTOM_SIMPLE_EDGE), Reducer 31 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) -Reducer 26 <- Map 30 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 24 <- Map 28 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 32 <- Map 31 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) -Reducer 33 <- Reducer 32 (SIMPLE_EDGE) -Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) -Reducer 37 <- Reducer 36 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE) -Reducer 38 <- Reducer 37 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 4 <- Reducer 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE) -Reducer 42 <- Map 45 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) -Reducer 43 <- Reducer 42 (SIMPLE_EDGE) -Reducer 47 <- Map 46 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) -Reducer 48 <- Map 55 (SIMPLE_EDGE), Reducer 47 (SIMPLE_EDGE) -Reducer 49 <- Reducer 48 (SIMPLE_EDGE) -Reducer 50 <- Reducer 49 (CUSTOM_SIMPLE_EDGE) -Reducer 51 <- Reducer 50 (CUSTOM_SIMPLE_EDGE), Reducer 60 (CUSTOM_SIMPLE_EDGE) -Reducer 52 <- Reducer 51 (CUSTOM_SIMPLE_EDGE), Reducer 65 (CUSTOM_SIMPLE_EDGE) -Reducer 53 <- Reducer 52 (SIMPLE_EDGE) -Reducer 57 <- Map 56 (SIMPLE_EDGE), Map 61 (SIMPLE_EDGE) -Reducer 58 <- Map 62 (SIMPLE_EDGE), Reducer 57 (SIMPLE_EDGE) -Reducer 59 <- Reducer 58 (SIMPLE_EDGE) +Reducer 30 <- Map 29 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (SIMPLE_EDGE) +Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 35 <- Reducer 34 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) +Reducer 36 <- Reducer 35 (SIMPLE_EDGE), Reducer 49 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) +Reducer 4 <- Reducer 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 40 <- Map 43 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) +Reducer 41 <- Reducer 40 (SIMPLE_EDGE) +Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE) +Reducer 46 <- Map 51 (SIMPLE_EDGE), Reducer 45 (SIMPLE_EDGE) +Reducer 47 <- Reducer 46 (SIMPLE_EDGE) +Reducer 48 <- Reducer 47 (CUSTOM_SIMPLE_EDGE) +Reducer 49 <- Reducer 48 (CUSTOM_SIMPLE_EDGE), Reducer 56 (CUSTOM_SIMPLE_EDGE), Reducer 61 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 52 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 54 <- Map 58 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE) +Reducer 55 <- Reducer 54 (SIMPLE_EDGE) +Reducer 56 <- Reducer 55 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) -Reducer 60 <- Reducer 59 (CUSTOM_SIMPLE_EDGE) -Reducer 64 <- Map 63 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE) -Reducer 65 <- Reducer 64 (SIMPLE_EDGE) +Reducer 60 <- Map 59 (SIMPLE_EDGE), Map 62 (SIMPLE_EDGE) +Reducer 61 <- Reducer 60 (SIMPLE_EDGE) Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 @@ -150,435 +144,413 @@ Stage-0 limit:100 Stage-1 Reducer 6 - File Output Operator [FS_258] - Limit [LIM_257] (rows=1 width=112) + File Output Operator [FS_246] + Limit [LIM_245] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_255] (rows=1 width=112) + Group By Operator [GBY_243] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] - <-Reducer 38 [CONTAINS] - Reduce Output Operator [RS_254] - Group By Operator [GBY_253] (rows=1 width=112) + <-Reducer 36 [CONTAINS] + Reduce Output Operator [RS_242] + Group By Operator [GBY_241] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_249] (rows=191667562 width=135) + Select Operator [SEL_237] (rows=191667562 width=135) Output:["_col0"] - Merge Join Operator [MERGEJOIN_384] (rows=191667562 width=135) - Conds:RS_246._col2=RS_247._col0(Inner),Output:["_col3","_col4"] - <-Reducer 37 [SIMPLE_EDGE] - SHUFFLE [RS_246] + Merge Join Operator [MERGEJOIN_370] (rows=191667562 width=135) + Conds:RS_234._col2=RS_235._col0(Inner),Output:["_col3","_col4"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_234] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_378] (rows=174243235 width=135) - Conds:RS_243._col1=RS_244._col0(Inner),Output:["_col2","_col3","_col4"] - <-Reducer 36 [SIMPLE_EDGE] - SHUFFLE [RS_243] + Merge Join Operator [MERGEJOIN_366] (rows=174243235 width=135) + Conds:RS_231._col1=RS_232._col0(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_231] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_369] (rows=158402938 width=135) - Conds:RS_240._col0=RS_241._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 35 [SIMPLE_EDGE] - SHUFFLE [RS_240] + Merge Join Operator [MERGEJOIN_357] (rows=158402938 width=135) + Conds:RS_228._col0=RS_229._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 33 [SIMPLE_EDGE] + SHUFFLE [RS_228] PartitionCols:_col0 - Select Operator [SEL_127] (rows=144002668 width=135) + Select Operator [SEL_121] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_346] (rows=144002668 width=135) + Filter Operator [FIL_334] (rows=144002668 width=135) predicate:(ws_item_sk is not null and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_125] (rows=144002668 width=135) + TableScan [TS_119] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] - <-Map 39 [SIMPLE_EDGE] - SHUFFLE [RS_241] + <-Map 37 [SIMPLE_EDGE] + SHUFFLE [RS_229] PartitionCols:_col0 - Select Operator [SEL_130] (rows=18262 width=1119) + Select Operator [SEL_124] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_347] (rows=18262 width=1119) + Filter Operator [FIL_335] (rows=18262 width=1119) predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) - TableScan [TS_128] (rows=73049 width=1119) + TableScan [TS_122] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 43 [SIMPLE_EDGE] - SHUFFLE [RS_244] + <-Reducer 41 [SIMPLE_EDGE] + SHUFFLE [RS_232] PartitionCols:_col0 - Group By Operator [GBY_156] (rows=58079562 width=88) + Group By Operator [GBY_150] (rows=58079562 width=88) Output:["_col0"],keys:_col1 - Select Operator [SEL_152] (rows=116159124 width=88) + Select Operator [SEL_146] (rows=116159124 width=88) Output:["_col1"] - Filter Operator [FIL_151] (rows=116159124 width=88) + Filter Operator [FIL_145] (rows=116159124 width=88) predicate:(_col3 > 4) - Select Operator [SEL_360] (rows=348477374 width=88) + Select Operator [SEL_348] (rows=348477374 width=88) Output:["_col0","_col3"] - Group By Operator [GBY_150] (rows=348477374 width=88) + Group By Operator [GBY_144] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_149] + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_143] PartitionCols:_col0 - Group By Operator [GBY_148] (rows=696954748 width=88) + Group By Operator [GBY_142] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_146] (rows=696954748 width=88) + Select Operator [SEL_140] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_371] (rows=696954748 width=88) - Conds:RS_143._col1=RS_144._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 45 [SIMPLE_EDGE] - SHUFFLE [RS_144] + Merge Join Operator [MERGEJOIN_359] (rows=696954748 width=88) + Conds:RS_137._col1=RS_138._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 43 [SIMPLE_EDGE] + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_139] (rows=462000 width=1436) + Select Operator [SEL_133] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_350] (rows=462000 width=1436) + Filter Operator [FIL_338] (rows=462000 width=1436) predicate:i_item_sk is not null - TableScan [TS_137] (rows=462000 width=1436) + TableScan [TS_131] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] - <-Reducer 41 [SIMPLE_EDGE] - SHUFFLE [RS_143] + <-Reducer 39 [SIMPLE_EDGE] + SHUFFLE [RS_137] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_370] (rows=633595212 width=88) - Conds:RS_140._col0=RS_141._col0(Inner),Output:["_col1","_col3"] - <-Map 40 [SIMPLE_EDGE] - SHUFFLE [RS_140] + Merge Join Operator [MERGEJOIN_358] (rows=633595212 width=88) + Conds:RS_134._col0=RS_135._col0(Inner),Output:["_col1","_col3"] + <-Map 38 [SIMPLE_EDGE] + SHUFFLE [RS_134] PartitionCols:_col0 - Select Operator [SEL_133] (rows=575995635 width=88) + Select Operator [SEL_127] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_348] (rows=575995635 width=88) + Filter Operator [FIL_336] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null) - TableScan [TS_131] (rows=575995635 width=88) + TableScan [TS_125] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] - <-Map 44 [SIMPLE_EDGE] - SHUFFLE [RS_141] + <-Map 42 [SIMPLE_EDGE] + SHUFFLE [RS_135] PartitionCols:_col0 - Select Operator [SEL_136] (rows=36525 width=1119) + Select Operator [SEL_130] (rows=36525 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_349] (rows=36525 width=1119) + Filter Operator [FIL_337] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_134] (rows=73049 width=1119) + TableScan [TS_128] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"] - <-Reducer 53 [SIMPLE_EDGE] - SHUFFLE [RS_247] + <-Reducer 49 [SIMPLE_EDGE] + SHUFFLE [RS_235] PartitionCols:_col0 - Group By Operator [GBY_238] (rows=52799601 width=322) - Output:["_col0"],keys:KEY._col0 - <-Reducer 52 [SIMPLE_EDGE] - SHUFFLE [RS_237] - PartitionCols:_col0 - Group By Operator [GBY_236] (rows=105599202 width=322) - Output:["_col0"],keys:_col2 - Select Operator [SEL_235] (rows=105599202 width=322) - Output:["_col2"] - Filter Operator [FIL_234] (rows=105599202 width=322) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_383] (rows=316797606 width=322) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 51 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_231] - Merge Join Operator [MERGEJOIN_382] (rows=1 width=233) - Conds:(Left Outer),Output:["_col1"] - <-Reducer 50 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_228] - Select Operator [SEL_186] (rows=1 width=8) - Filter Operator [FIL_185] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_183] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 49 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_182] - Group By Operator [GBY_181] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_178] (rows=348477374 width=88) - Group By Operator [GBY_177] (rows=348477374 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 48 [SIMPLE_EDGE] - SHUFFLE [RS_176] - PartitionCols:_col0 - Group By Operator [GBY_175] (rows=696954748 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_173] (rows=696954748 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_373] (rows=696954748 width=88) - Conds:RS_170._col1=RS_171._col0(Inner),Output:["_col6"] - <-Map 55 [SIMPLE_EDGE] - SHUFFLE [RS_171] - PartitionCols:_col0 - Select Operator [SEL_166] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_353] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_164] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 47 [SIMPLE_EDGE] - SHUFFLE [RS_170] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_372] (rows=633595212 width=88) - Conds:RS_167._col0=RS_168._col0(Inner),Output:["_col1"] - <-Map 46 [SIMPLE_EDGE] - SHUFFLE [RS_167] - PartitionCols:_col0 - Select Operator [SEL_160] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_351] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_158] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Map 54 [SIMPLE_EDGE] - SHUFFLE [RS_168] - PartitionCols:_col0 - Select Operator [SEL_163] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_352] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_161] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 60 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_229] - Group By Operator [GBY_211] (rows=1 width=224) - Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 59 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_210] - Group By Operator [GBY_209] (rows=1 width=224) - Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_207] (rows=348477374 width=88) - Output:["_col1"] - Group By Operator [GBY_206] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 58 [SIMPLE_EDGE] - SHUFFLE [RS_205] - PartitionCols:_col0 - Group By Operator [GBY_204] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_202] (rows=696954748 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_375] (rows=696954748 width=88) - Conds:RS_199._col1=RS_200._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 62 [SIMPLE_EDGE] - SHUFFLE [RS_200] - PartitionCols:_col0 - Select Operator [SEL_195] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_356] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_193] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 57 [SIMPLE_EDGE] - SHUFFLE [RS_199] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_374] (rows=633595212 width=88) - Conds:RS_196._col0=RS_197._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 56 [SIMPLE_EDGE] - SHUFFLE [RS_196] - PartitionCols:_col0 - Select Operator [SEL_189] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_354] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_187] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 61 [SIMPLE_EDGE] - SHUFFLE [RS_197] - PartitionCols:_col0 - Select Operator [SEL_192] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_355] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_190] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 65 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_232] - Group By Operator [GBY_226] (rows=316797606 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 64 [SIMPLE_EDGE] - SHUFFLE [RS_225] - PartitionCols:_col0 - Group By Operator [GBY_224] (rows=633595212 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_222] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_376] (rows=633595212 width=88) - Conds:RS_219._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 63 [SIMPLE_EDGE] - SHUFFLE [RS_219] - PartitionCols:_col0 - Select Operator [SEL_215] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_357] (rows=575995635 width=88) - predicate:ss_customer_sk is not null - TableScan [TS_213] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 66 [SIMPLE_EDGE] - SHUFFLE [RS_220] - PartitionCols:_col0 - Select Operator [SEL_218] (rows=80000000 width=860) + Select Operator [SEL_227] (rows=105599202 width=321) + Output:["_col0"] + Filter Operator [FIL_226] (rows=105599202 width=321) + predicate:(_col3 > (0.95 * _col1)) + Merge Join Operator [MERGEJOIN_369] (rows=316797606 width=321) + Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 48 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_222] + Select Operator [SEL_180] (rows=1 width=8) + Filter Operator [FIL_179] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_177] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 47 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_176] + Group By Operator [GBY_175] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_172] (rows=348477374 width=88) + Group By Operator [GBY_171] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_170] + PartitionCols:_col0 + Group By Operator [GBY_169] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_167] (rows=696954748 width=88) Output:["_col0"] - Filter Operator [FIL_358] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_216] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + Merge Join Operator [MERGEJOIN_361] (rows=696954748 width=88) + Conds:RS_164._col1=RS_165._col0(Inner),Output:["_col6"] + <-Map 51 [SIMPLE_EDGE] + SHUFFLE [RS_165] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_341] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_158] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 45 [SIMPLE_EDGE] + SHUFFLE [RS_164] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_360] (rows=633595212 width=88) + Conds:RS_161._col0=RS_162._col0(Inner),Output:["_col1"] + <-Map 44 [SIMPLE_EDGE] + SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_154] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_339] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_152] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Map 50 [SIMPLE_EDGE] + SHUFFLE [RS_162] + PartitionCols:_col0 + Select Operator [SEL_157] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_340] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_155] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 56 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_223] + Group By Operator [GBY_205] (rows=1 width=224) + Output:["_col0"],aggregations:["max(VALUE._col0)"] + <-Reducer 55 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_204] + Group By Operator [GBY_203] (rows=1 width=224) + Output:["_col0"],aggregations:["max(_col1)"] + Select Operator [SEL_201] (rows=348477374 width=88) + Output:["_col1"] + Group By Operator [GBY_200] (rows=348477374 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 54 [SIMPLE_EDGE] + SHUFFLE [RS_199] + PartitionCols:_col0 + Group By Operator [GBY_198] (rows=696954748 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_196] (rows=696954748 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_363] (rows=696954748 width=88) + Conds:RS_193._col1=RS_194._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 58 [SIMPLE_EDGE] + SHUFFLE [RS_194] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_344] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_187] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 53 [SIMPLE_EDGE] + SHUFFLE [RS_193] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_362] (rows=633595212 width=88) + Conds:RS_190._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 52 [SIMPLE_EDGE] + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_183] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_342] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_181] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 57 [SIMPLE_EDGE] + SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_186] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_343] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_184] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 61 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_224] + Group By Operator [GBY_220] (rows=316797606 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 60 [SIMPLE_EDGE] + SHUFFLE [RS_219] + PartitionCols:_col0 + Group By Operator [GBY_218] (rows=633595212 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_216] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_364] (rows=633595212 width=88) + Conds:RS_213._col0=RS_214._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 59 [SIMPLE_EDGE] + SHUFFLE [RS_213] + PartitionCols:_col0 + Select Operator [SEL_209] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_345] (rows=575995635 width=88) + predicate:ss_customer_sk is not null + TableScan [TS_207] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 62 [SIMPLE_EDGE] + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_212] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_346] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_210] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_254] - Group By Operator [GBY_253] (rows=1 width=112) + Reduce Output Operator [RS_242] + Group By Operator [GBY_241] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_124] (rows=383314495 width=135) + Select Operator [SEL_118] (rows=383314495 width=135) Output:["_col0"] - Merge Join Operator [MERGEJOIN_381] (rows=383314495 width=135) - Conds:RS_121._col1=RS_122._col0(Inner),Output:["_col3","_col4"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_122] + Merge Join Operator [MERGEJOIN_368] (rows=383314495 width=135) + Conds:RS_115._col1=RS_116._col0(Inner),Output:["_col3","_col4"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_116] PartitionCols:_col0 - Group By Operator [GBY_113] (rows=52799601 width=322) - Output:["_col0"],keys:KEY._col0 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_112] - PartitionCols:_col0 - Group By Operator [GBY_111] (rows=105599202 width=322) - Output:["_col0"],keys:_col2 - Select Operator [SEL_110] (rows=105599202 width=322) - Output:["_col2"] - Filter Operator [FIL_109] (rows=105599202 width=322) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_380] (rows=316797606 width=322) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_106] - Merge Join Operator [MERGEJOIN_379] (rows=1 width=233) - Conds:(Left Outer),Output:["_col1"] - <-Reducer 18 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_103] - Select Operator [SEL_61] (rows=1 width=8) - Filter Operator [FIL_60] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_58] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_57] - Group By Operator [GBY_56] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_53] (rows=348477374 width=88) - Group By Operator [GBY_52] (rows=348477374 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col0 - Group By Operator [GBY_50] (rows=696954748 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_48] (rows=696954748 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_365] (rows=696954748 width=88) - Conds:RS_45._col1=RS_46._col0(Inner),Output:["_col6"] - <-Map 23 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col0 - Select Operator [SEL_41] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_340] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_39] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_364] (rows=633595212 width=88) - Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_35] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_338] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_33] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Map 22 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0 - Select Operator [SEL_38] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_339] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_36] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_104] - Group By Operator [GBY_86] (rows=1 width=224) - Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_85] - Group By Operator [GBY_84] (rows=1 width=224) - Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_82] (rows=348477374 width=88) - Output:["_col1"] - Group By Operator [GBY_81] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_80] - PartitionCols:_col0 - Group By Operator [GBY_79] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_77] (rows=696954748 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_367] (rows=696954748 width=88) - Conds:RS_74._col1=RS_75._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 30 [SIMPLE_EDGE] - SHUFFLE [RS_75] - PartitionCols:_col0 - Select Operator [SEL_70] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_343] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_68] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_74] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_366] (rows=633595212 width=88) - Conds:RS_71._col0=RS_72._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 24 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col0 - Select Operator [SEL_64] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_341] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_62] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 29 [SIMPLE_EDGE] - SHUFFLE [RS_72] - PartitionCols:_col0 - Select Operator [SEL_67] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_342] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_65] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_107] - Group By Operator [GBY_101] (rows=316797606 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_100] - PartitionCols:_col0 - Group By Operator [GBY_99] (rows=633595212 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_97] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_368] (rows=633595212 width=88) - Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_94] - PartitionCols:_col0 - Select Operator [SEL_90] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_344] (rows=575995635 width=88) - predicate:ss_customer_sk is not null - TableScan [TS_88] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 34 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col0 - Select Operator [SEL_93] (rows=80000000 width=860) + Select Operator [SEL_108] (rows=105599202 width=321) + Output:["_col0"] + Filter Operator [FIL_107] (rows=105599202 width=321) + predicate:(_col3 > (0.95 * _col1)) + Merge Join Operator [MERGEJOIN_367] (rows=316797606 width=321) + Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_103] + Select Operator [SEL_61] (rows=1 width=8) + Filter Operator [FIL_60] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_58] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_57] + Group By Operator [GBY_56] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_53] (rows=348477374 width=88) + Group By Operator [GBY_52] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col0 + Group By Operator [GBY_50] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_48] (rows=696954748 width=88) Output:["_col0"] - Filter Operator [FIL_345] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_91] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + Merge Join Operator [MERGEJOIN_353] (rows=696954748 width=88) + Conds:RS_45._col1=RS_46._col0(Inner),Output:["_col6"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Select Operator [SEL_41] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_328] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_39] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_352] (rows=633595212 width=88) + Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_35] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_326] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_33] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Select Operator [SEL_38] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_327] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_36] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 26 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_104] + Group By Operator [GBY_86] (rows=1 width=224) + Output:["_col0"],aggregations:["max(VALUE._col0)"] + <-Reducer 25 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_85] + Group By Operator [GBY_84] (rows=1 width=224) + Output:["_col0"],aggregations:["max(_col1)"] + Select Operator [SEL_82] (rows=348477374 width=88) + Output:["_col1"] + Group By Operator [GBY_81] (rows=348477374 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col0 + Group By Operator [GBY_79] (rows=696954748 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_77] (rows=696954748 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_355] (rows=696954748 width=88) + Conds:RS_74._col1=RS_75._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 28 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_331] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_68] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_74] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_354] (rows=633595212 width=88) + Conds:RS_71._col0=RS_72._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 22 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_329] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_62] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col0 + Select Operator [SEL_67] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_330] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_65] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_105] + Group By Operator [GBY_101] (rows=316797606 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_100] + PartitionCols:_col0 + Group By Operator [GBY_99] (rows=633595212 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_97] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_356] (rows=633595212 width=88) + Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 29 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col0 + Select Operator [SEL_90] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_332] (rows=575995635 width=88) + predicate:ss_customer_sk is not null + TableScan [TS_88] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 32 [SIMPLE_EDGE] + SHUFFLE [RS_95] + PartitionCols:_col0 + Select Operator [SEL_93] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_333] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_91] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_121] + SHUFFLE [RS_115] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_377] (rows=348467716 width=135) - Conds:RS_118._col2=RS_119._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_365] (rows=348467716 width=135) + Conds:RS_112._col2=RS_113._col0(Inner),Output:["_col1","_col3","_col4"] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_119] + SHUFFLE [RS_113] PartitionCols:_col0 Group By Operator [GBY_31] (rows=58079562 width=88) Output:["_col0"],keys:_col1 @@ -586,7 +558,7 @@ Stage-0 Output:["_col1"] Filter Operator [FIL_26] (rows=116159124 width=88) predicate:(_col3 > 4) - Select Operator [SEL_359] (rows=348477374 width=88) + Select Operator [SEL_347] (rows=348477374 width=88) Output:["_col0","_col3"] Group By Operator [GBY_25] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 @@ -597,28 +569,28 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 Select Operator [SEL_21] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_363] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_351] (rows=696954748 width=88) Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col3","_col5","_col6"] <-Map 13 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_14] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_337] (rows=462000 width=1436) + Filter Operator [FIL_325] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_12] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_362] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_350] (rows=633595212 width=88) Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col3"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 Select Operator [SEL_11] (rows=36525 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_336] (rows=36525 width=1119) + Filter Operator [FIL_324] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"] @@ -627,30 +599,30 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_8] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_335] (rows=575995635 width=88) + Filter Operator [FIL_323] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null) TableScan [TS_6] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_118] + SHUFFLE [RS_112] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_361] (rows=316788826 width=135) - Conds:RS_115._col0=RS_116._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_349] (rows=316788826 width=135) + Conds:RS_109._col0=RS_110._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_115] + SHUFFLE [RS_109] PartitionCols:_col0 Select Operator [SEL_2] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_333] (rows=287989836 width=135) + Filter Operator [FIL_321] (rows=287989836 width=135) predicate:(cs_item_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_116] + SHUFFLE [RS_110] PartitionCols:_col0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_334] (rows=18262 width=1119) + Filter Operator [FIL_322] (rows=18262 width=1119) predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] diff --git a/ql/src/test/results/clientpositive/perf/query30.q.out b/ql/src/test/results/clientpositive/perf/query30.q.out index 3bb4f81..f8b5ced 100644 --- a/ql/src/test/results/clientpositive/perf/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/query30.q.out @@ -98,30 +98,30 @@ Stage-0 <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col2 - Select Operator [SEL_50] (rows=5500000 width=1014) + Select Operator [SEL_50] (rows=11000000 width=1014) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_49] (rows=5500000 width=1014) + Group By Operator [GBY_49] (rows=11000000 width=1014) Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 - Select Operator [SEL_45] (rows=11000000 width=1014) + Select Operator [SEL_45] (rows=22000000 width=1014) Output:["_col0","_col2"] - Group By Operator [GBY_44] (rows=11000000 width=1014) + Group By Operator [GBY_44] (rows=22000000 width=1014) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 - Group By Operator [GBY_42] (rows=22000000 width=1014) + Group By Operator [GBY_42] (rows=44000000 width=1014) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Select Operator [SEL_41] (rows=22000000 width=1014) + Select Operator [SEL_41] (rows=44000000 width=1014) Output:["_col7","_col1","_col3"] - Merge Join Operator [MERGEJOIN_103] (rows=22000000 width=1014) + Merge Join Operator [MERGEJOIN_103] (rows=44000000 width=1014) Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] <-Map 18 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col0 - Select Operator [SEL_34] (rows=20000000 width=1014) + Select Operator [SEL_34] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_98] (rows=20000000 width=1014) - predicate:((ca_state = ca_state) and ca_address_sk is not null) + Filter Operator [FIL_98] (rows=40000000 width=1014) + predicate:(ca_address_sk is not null and ca_state is not null) TableScan [TS_32] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] <-Reducer 14 [SIMPLE_EDGE] diff --git a/ql/src/test/results/clientpositive/perf/query37.q.out b/ql/src/test/results/clientpositive/perf/query37.q.out index 1b61198..1c4f715 100644 --- a/ql/src/test/results/clientpositive/perf/query37.q.out +++ b/ql/src/test/results/clientpositive/perf/query37.q.out @@ -97,7 +97,7 @@ Stage-0 Select Operator [SEL_11] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_42] (rows=8116 width=1119) - predicate:(d_date BETWEEN 2001-06-02 AND 2001-08-01 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-06-02 00:00:00.0 AND 2001-08-01 00:00:00.0 and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/perf/query40.q.out b/ql/src/test/results/clientpositive/perf/query40.q.out index a45fdaf..919c5c2 100644 --- a/ql/src/test/results/clientpositive/perf/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/query40.q.out @@ -119,7 +119,7 @@ Stage-0 Select Operator [SEL_8] (rows=8116 width=1119) Output:["_col0","_col1"] Filter Operator [FIL_52] (rows=8116 width=1119) - predicate:(d_date BETWEEN 1998-03-08 23:00:00.0 AND 1998-05-08 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-08 23:00:00.0 AND 1998-05-08 00:00:00.0 and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Reducer 2 [SIMPLE_EDGE] diff --git a/ql/src/test/results/clientpositive/perf/query5.q.out b/ql/src/test/results/clientpositive/perf/query5.q.out index d3ec922..c1e4ec1 100644 --- a/ql/src/test/results/clientpositive/perf/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/query5.q.out @@ -328,7 +328,7 @@ Stage-0 Select Operator [SEL_35] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_125] (rows=8116 width=1119) - predicate:(d_date BETWEEN 1998-08-04 AND 1998-08-18 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-08-18 00:00:00.0 and d_date_sk is not null) TableScan [TS_33] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Union 13 [SIMPLE_EDGE] @@ -388,7 +388,7 @@ Stage-0 Select Operator [SEL_69] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_130] (rows=8116 width=1119) - predicate:(d_date BETWEEN 1998-08-04 AND 1998-08-18 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-08-18 00:00:00.0 and d_date_sk is not null) TableScan [TS_67] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Union 21 [SIMPLE_EDGE] @@ -464,7 +464,7 @@ Stage-0 Select Operator [SEL_10] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_121] (rows=8116 width=1119) - predicate:(d_date BETWEEN 1998-08-04 AND 1998-08-18 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-08-18 00:00:00.0 and d_date_sk is not null) TableScan [TS_8] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Union 2 [SIMPLE_EDGE] diff --git a/ql/src/test/results/clientpositive/perf/query6.q.out b/ql/src/test/results/clientpositive/perf/query6.q.out index ca17206..02fdf00 100644 --- a/ql/src/test/results/clientpositive/perf/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/query6.q.out @@ -213,19 +213,19 @@ Stage-0 <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col2 - Select Operator [SEL_9] (rows=115500 width=1436) + Select Operator [SEL_9] (rows=231000 width=1436) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=115500 width=1436) + Group By Operator [GBY_8] (rows=231000 width=1436) Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] SHUFFLE [RS_7] PartitionCols:_col0 - Group By Operator [GBY_6] (rows=231000 width=1436) + Group By Operator [GBY_6] (rows=462000 width=1436) Output:["_col0","_col1"],aggregations:["avg(i_current_price)"],keys:i_category - Select Operator [SEL_5] (rows=231000 width=1436) + Select Operator [SEL_5] (rows=462000 width=1436) Output:["i_category","i_current_price"] - Filter Operator [FIL_101] (rows=231000 width=1436) - predicate:(i_category = i_category) + Filter Operator [FIL_101] (rows=462000 width=1436) + predicate:i_category is not null TableScan [TS_3] (rows=462000 width=1436) default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"] diff --git a/ql/src/test/results/clientpositive/perf/query69.q.out b/ql/src/test/results/clientpositive/perf/query69.q.out index 90224ce..f9e3fdd 100644 --- a/ql/src/test/results/clientpositive/perf/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/query69.q.out @@ -91,15 +91,15 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 17 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) @@ -112,46 +112,46 @@ Stage-0 File Output Operator [FS_76] Limit [LIM_75] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_74] (rows=52707204 width=88) + Select Operator [SEL_74] (rows=95831279 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_73] - Select Operator [SEL_72] (rows=52707204 width=88) + Select Operator [SEL_72] (rows=95831279 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Group By Operator [GBY_71] (rows=52707204 width=88) + Group By Operator [GBY_71] (rows=95831279 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_70] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_69] (rows=105414409 width=88) + Group By Operator [GBY_69] (rows=191662559 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10 - Select Operator [SEL_68] (rows=105414409 width=88) + Select Operator [SEL_68] (rows=191662559 width=88) Output:["_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_67] (rows=105414409 width=88) + Filter Operator [FIL_67] (rows=191662559 width=88) predicate:_col15 is null - Merge Join Operator [MERGEJOIN_117] (rows=210828818 width=88) + Merge Join Operator [MERGEJOIN_114] (rows=383325119 width=88) Conds:RS_64._col0=RS_65._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col15"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_63] (rows=79197206 width=135) + Select Operator [SEL_63] (rows=158394413 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_62] (rows=79197206 width=135) + Group By Operator [GBY_62] (rows=158394413 width=135) Output:["_col0"],keys:KEY._col0 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_61] PartitionCols:_col0 - Group By Operator [GBY_60] (rows=158394413 width=135) + Group By Operator [GBY_60] (rows=316788826 width=135) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_116] (rows=158394413 width=135) + Merge Join Operator [MERGEJOIN_112] (rows=316788826 width=135) Conds:RS_56._col0=RS_57._col0(Inner),Output:["_col1"] <-Map 18 [SIMPLE_EDGE] SHUFFLE [RS_56] PartitionCols:_col0 - Select Operator [SEL_52] (rows=143994918 width=135) + Select Operator [SEL_52] (rows=287989836 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_109] (rows=143994918 width=135) - predicate:((cs_ship_customer_sk = cs_ship_customer_sk) and cs_sold_date_sk is not null) + Filter Operator [FIL_106] (rows=287989836 width=135) + predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_50] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] <-Map 21 [SIMPLE_EDGE] @@ -159,108 +159,108 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_55] (rows=4058 width=1119) Output:["_col0"] - Filter Operator [FIL_110] (rows=4058 width=1119) + Filter Operator [FIL_107] (rows=4058 width=1119) predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) TableScan [TS_53] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_64] PartitionCols:_col0 - Select Operator [SEL_49] (rows=191662558 width=88) + Select Operator [SEL_49] (rows=348477374 width=88) Output:["_col0","_col10","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_48] (rows=191662558 width=88) + Filter Operator [FIL_48] (rows=348477374 width=88) predicate:_col13 is null - Select Operator [SEL_47] (rows=383325116 width=88) + Select Operator [SEL_47] (rows=696954748 width=88) Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col13"] - Merge Join Operator [MERGEJOIN_115] (rows=383325116 width=88) - Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col0","_col7","_col9","_col10","_col11","_col12","_col13"] - <-Map 17 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_113] (rows=696954748 width=88) + Conds:RS_43._col0=RS_44._col0(Left Outer),RS_43._col0=RS_45._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col12"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Select Operator [SEL_22] (rows=79201469 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_21] (rows=79201469 width=135) + Output:["_col0"],keys:KEY._col0 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col0 + Group By Operator [GBY_19] (rows=158402938 width=135) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_110] (rows=158402938 width=135) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_102] (rows=144002668 width=135) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_9] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=4058 width=1119) + Output:["_col0"] + Filter Operator [FIL_103] (rows=4058 width=1119) + predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0 - Select Operator [SEL_36] (rows=1861800 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_108] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_34] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_114] (rows=348477371 width=88) - Conds:RS_40._col0=RS_41._col0(Inner),RS_40._col0=RS_42._col0(Left Outer),Output:["_col0","_col1","_col7"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Group By Operator [GBY_18] (rows=158398802 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=316797605 width=88) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_112] (rows=316797605 width=88) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=4058 width=1119) - Output:["_col0"] - Filter Operator [FIL_105] (rows=4058 width=1119) - predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=287997817 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_104] (rows=287997817 width=88) - predicate:((ss_customer_sk = ss_customer_sk) and ss_sold_date_sk is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + Group By Operator [GBY_35] (rows=316797606 width=88) + Output:["_col0"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_42] + SHUFFLE [RS_34] PartitionCols:_col0 - Select Operator [SEL_33] (rows=39600734 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_32] (rows=39600734 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_31] + Group By Operator [GBY_33] (rows=633595212 width=88) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_111] (rows=633595212 width=88) + Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col0 - Group By Operator [GBY_30] (rows=79201469 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_113] (rows=79201469 width=135) - Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Select Operator [SEL_22] (rows=72001334 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_106] (rows=72001334 width=135) - predicate:((ws_bill_customer_sk = ws_bill_customer_sk) and ws_sold_date_sk is not null) - TableScan [TS_20] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_25] (rows=4058 width=1119) - Output:["_col0"] - Filter Operator [FIL_107] (rows=4058 width=1119) - predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) - TableScan [TS_23] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + Select Operator [SEL_25] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_104] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_23] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=4058 width=1119) + Output:["_col0"] + Filter Operator [FIL_105] (rows=4058 width=1119) + predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) + TableScan [TS_26] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_109] (rows=96800003 width=860) + Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1861800 width=385) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_101] (rows=1861800 width=385) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_40] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_111] (rows=88000001 width=860) + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_108] (rows=88000001 width=860) Conds:RS_37._col2=RS_38._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col2 Select Operator [SEL_2] (rows=80000000 width=860) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_102] (rows=80000000 width=860) + Filter Operator [FIL_99] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] @@ -269,7 +269,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_103] (rows=20000000 width=1014) + Filter Operator [FIL_100] (rows=20000000 width=1014) predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) TableScan [TS_3] (rows=40000000 width=1014) default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out b/ql/src/test/results/clientpositive/perf/query70.q.out index bf90cdd..cfab832 100644 --- a/ql/src/test/results/clientpositive/perf/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/query70.q.out @@ -91,119 +91,117 @@ Stage-0 limit:100 Stage-1 Reducer 7 - File Output Operator [FS_62] - Limit [LIM_61] (rows=100 width=88) + File Output Operator [FS_60] + Limit [LIM_59] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_60] (rows=1149975358 width=88) + Select Operator [SEL_58] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_59] - Select Operator [SEL_57] (rows=1149975358 width=88) + SHUFFLE [RS_57] + Select Operator [SEL_55] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_56] (rows=1149975358 width=88) + PTF Operator [PTF_54] (rows=1149975358 width=88) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END"}] - Select Operator [SEL_55] (rows=1149975358 width=88) + Select Operator [SEL_53] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_54] + SHUFFLE [RS_52] PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END - Select Operator [SEL_53] (rows=1149975358 width=88) + Select Operator [SEL_51] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5"] - Group By Operator [GBY_52] (rows=1149975358 width=88) + Group By Operator [GBY_50] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_51] + SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_50] (rows=2299950717 width=88) + Group By Operator [GBY_48] (rows=2299950717 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0 - Select Operator [SEL_48] (rows=766650239 width=88) + Select Operator [SEL_46] (rows=766650239 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_90] (rows=766650239 width=88) - Conds:RS_45._col7=RS_46._col0(Left Semi),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_88] (rows=766650239 width=88) + Conds:RS_43._col7=RS_44._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_46] + SHUFFLE [RS_44] PartitionCols:_col0 - Group By Operator [GBY_44] (rows=116159124 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_32] (rows=116159124 width=88) - Output:["_col0"] - Filter Operator [FIL_82] (rows=116159124 width=88) - predicate:(rank_window_0 <= 5) - PTF Operator [PTF_31] (rows=348477374 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_30] (rows=348477374 width=88) - Output:["_col0","_col1"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Group By Operator [GBY_27] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Group By Operator [GBY_25] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col6 - Select Operator [SEL_24] (rows=696954748 width=88) - Output:["_col6","_col2"] - Merge Join Operator [MERGEJOIN_89] (rows=696954748 width=88) - Conds:RS_21._col1=RS_22._col0(Inner),Output:["_col2","_col6"] - <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_17] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_85] (rows=1704 width=1910) - predicate:(s_store_sk is not null and s_state is not null) - TableScan [TS_15] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_88] (rows=633595212 width=88) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_83] (rows=575995635 width=88) - predicate:(ss_store_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_9] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_84] (rows=8116 width=1119) - predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + Select Operator [SEL_32] (rows=116159124 width=88) + Output:["_col0"] + Filter Operator [FIL_80] (rows=116159124 width=88) + predicate:(rank_window_0 <= 5) + PTF Operator [PTF_31] (rows=348477374 width=88) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] + Select Operator [SEL_30] (rows=348477374 width=88) + Output:["_col0","_col1"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_27] (rows=348477374 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Group By Operator [GBY_25] (rows=696954748 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col6 + Select Operator [SEL_24] (rows=696954748 width=88) + Output:["_col6","_col2"] + Merge Join Operator [MERGEJOIN_87] (rows=696954748 width=88) + Conds:RS_21._col1=RS_22._col0(Inner),Output:["_col2","_col6"] + <-Map 16 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_83] (rows=1704 width=1910) + predicate:(s_store_sk is not null and s_state is not null) + TableScan [TS_15] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_86] (rows=633595212 width=88) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_81] (rows=575995635 width=88) + predicate:(ss_store_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_82] (rows=8116 width=1119) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_45] + SHUFFLE [RS_43] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_87] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_85] (rows=696954748 width=88) Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col2","_col6","_col7"] <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col0 Select Operator [SEL_8] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_81] (rows=1704 width=1910) + Filter Operator [FIL_79] (rows=1704 width=1910) predicate:(s_state is not null and s_store_sk is not null) TableScan [TS_6] (rows=1704 width=1910) default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county","s_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_86] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_84] (rows=633595212 width=88) Conds:RS_37._col0=RS_38._col0(Inner),Output:["_col1","_col2"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 Select Operator [SEL_2] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_79] (rows=575995635 width=88) + Filter Operator [FIL_77] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] @@ -212,7 +210,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_80] (rows=8116 width=1119) + Filter Operator [FIL_78] (rows=8116 width=1119) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/perf/query80.q.out b/ql/src/test/results/clientpositive/perf/query80.q.out index 7338d9c..be7ecda 100644 --- a/ql/src/test/results/clientpositive/perf/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/query80.q.out @@ -295,7 +295,7 @@ Stage-0 Select Operator [SEL_47] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_194] (rows=8116 width=1119) - predicate:(d_date BETWEEN 1998-08-04 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) TableScan [TS_45] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Reducer 17 [SIMPLE_EDGE] @@ -387,7 +387,7 @@ Stage-0 Select Operator [SEL_88] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_200] (rows=8116 width=1119) - predicate:(d_date BETWEEN 1998-08-04 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) TableScan [TS_86] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Reducer 29 [SIMPLE_EDGE] @@ -479,7 +479,7 @@ Stage-0 Select Operator [SEL_8] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_188] (rows=8116 width=1119) - predicate:(d_date BETWEEN 1998-08-04 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-08-04 00:00:00.0 AND 1998-09-03 00:00:00.0 and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Reducer 2 [SIMPLE_EDGE] diff --git a/ql/src/test/results/clientpositive/perf/query81.q.out b/ql/src/test/results/clientpositive/perf/query81.q.out index 25bd68e..b2317f8 100644 --- a/ql/src/test/results/clientpositive/perf/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/query81.q.out @@ -98,30 +98,30 @@ Stage-0 <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col2 - Select Operator [SEL_50] (rows=8711661 width=106) + Select Operator [SEL_50] (rows=11000000 width=1014) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_49] (rows=8711661 width=106) + Group By Operator [GBY_49] (rows=11000000 width=1014) Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 - Select Operator [SEL_45] (rows=17423323 width=106) + Select Operator [SEL_45] (rows=22000000 width=1014) Output:["_col0","_col2"] - Group By Operator [GBY_44] (rows=17423323 width=106) + Group By Operator [GBY_44] (rows=22000000 width=1014) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 - Group By Operator [GBY_42] (rows=34846646 width=106) + Group By Operator [GBY_42] (rows=44000000 width=1014) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Select Operator [SEL_41] (rows=34846646 width=106) + Select Operator [SEL_41] (rows=44000000 width=1014) Output:["_col7","_col1","_col3"] - Merge Join Operator [MERGEJOIN_103] (rows=34846646 width=106) + Merge Join Operator [MERGEJOIN_103] (rows=44000000 width=1014) Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] <-Map 18 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col0 - Select Operator [SEL_34] (rows=20000000 width=1014) + Select Operator [SEL_34] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_98] (rows=20000000 width=1014) - predicate:((ca_state = ca_state) and ca_address_sk is not null) + Filter Operator [FIL_98] (rows=40000000 width=1014) + predicate:(ca_address_sk is not null and ca_state is not null) TableScan [TS_32] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] <-Reducer 14 [SIMPLE_EDGE] diff --git a/ql/src/test/results/clientpositive/perf/query82.q.out b/ql/src/test/results/clientpositive/perf/query82.q.out index 9c0042c..881070e 100644 --- a/ql/src/test/results/clientpositive/perf/query82.q.out +++ b/ql/src/test/results/clientpositive/perf/query82.q.out @@ -97,7 +97,7 @@ Stage-0 Select Operator [SEL_11] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_42] (rows=8116 width=1119) - predicate:(d_date BETWEEN 2002-05-30 AND 2002-07-29 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2002-05-30 00:00:00.0 AND 2002-07-29 00:00:00.0 and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/perf/query98.q.out b/ql/src/test/results/clientpositive/perf/query98.q.out index 1bae9be..284e70d 100644 --- a/ql/src/test/results/clientpositive/perf/query98.q.out +++ b/ql/src/test/results/clientpositive/perf/query98.q.out @@ -82,7 +82,7 @@ Stage-0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_34] (rows=8116 width=1119) - predicate:(d_date BETWEEN 2001-01-12 AND 2001-02-11 00:00:00.0 and d_date_sk is not null) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-01-12 00:00:00.0 AND 2001-02-11 00:00:00.0 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/ppd_outer_join1.q.out b/ql/src/test/results/clientpositive/ppd_outer_join1.q.out index 8ec267c..f4d8089 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join1.q.out @@ -28,53 +28,50 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -138,53 +135,50 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out b/ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out index 8e35d1b..c6e85d6 100644 --- a/ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out +++ b/ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out @@ -544,11 +544,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: value (type: string) + keys: key (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -592,17 +592,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 62500 Data size: 1390500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 62500 Data size: 1390500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -611,17 +606,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: -+ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 62500 Data size: 1390500 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 31250 Data size: 695250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 62500 Data size: 1390500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 31250 Data size: 695250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 62500 Data size: 1390500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -634,11 +627,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: key (type: string) + keys: value (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/router_join_ppr.q.out b/ql/src/test/results/clientpositive/router_join_ppr.q.out index cc2b07e..f17959b 100644 --- a/ql/src/test/results/clientpositive/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/router_join_ppr.q.out @@ -30,41 +30,41 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false TableScan alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -211,149 +211,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /src [$hdt$_0:a] /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -373,8 +266,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -388,8 +279,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -435,18 +324,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -456,18 +345,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -624,42 +513,38 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -737,18 +622,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -758,18 +643,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -926,38 +811,34 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/semijoin5.q.out b/ql/src/test/results/clientpositive/semijoin5.q.out index fd8e372..db4f551 100644 --- a/ql/src/test/results/clientpositive/semijoin5.q.out +++ b/ql/src/test/results/clientpositive/semijoin5.q.out @@ -255,7 +255,7 @@ STAGE PLANS: alias: tt2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((timestamp_col_18 = timestamp_col_18) and decimal1911_col_16 is not null) (type: boolean) + predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp) diff --git a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index 3b0d96c..602ccb2 100644 --- a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -898,7 +898,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) @@ -915,7 +915,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index 2b09fcf..cdb69db 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -1033,7 +1033,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) @@ -1058,7 +1058,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) diff --git a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out index 89c2735..be33189 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out @@ -446,7 +446,7 @@ STAGE PLANS: alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((id = 100) and (id = 100) is not null) (type: boolean) + predicate: (id = 100) (type: boolean) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 100 (type: int), true (type: boolean) @@ -517,7 +517,7 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((dimid = 100) and (dimid = 100) is not null) (type: boolean) + predicate: (dimid = 100) (type: boolean) Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), val (type: string), val1 (type: string) @@ -535,7 +535,7 @@ STAGE PLANS: alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((id = 100) and (id = 100) is not null) (type: boolean) + predicate: (id = 100) (type: boolean) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 100 (type: int), true (type: boolean) @@ -626,7 +626,7 @@ STAGE PLANS: alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((id = 100) and (id = 100) is not null) (type: boolean) + predicate: (id = 100) (type: boolean) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 100 (type: int), true (type: boolean) @@ -697,7 +697,7 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((dimid = 100) and (dimid = 100) is not null) (type: boolean) + predicate: (dimid = 100) (type: boolean) Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), val (type: string), val1 (type: string) @@ -715,7 +715,7 @@ STAGE PLANS: alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((id = 100) and (id = 100) is not null) (type: boolean) + predicate: (id = 100) (type: boolean) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 100 (type: int), true (type: boolean) @@ -788,7 +788,7 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((dimid = 100) and (dimid = 100) is not null) (type: boolean) + predicate: (dimid = 100) (type: boolean) Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), val (type: string), val1 (type: string) @@ -806,7 +806,7 @@ STAGE PLANS: alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((id = 100) and (id = 100) is not null) (type: boolean) + predicate: (id = 100) (type: boolean) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 100 (type: int), true (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/join_merging.q.out b/ql/src/test/results/clientpositive/spark/join_merging.q.out index fcf80bd..4499b92 100644 --- a/ql/src/test/results/clientpositive/spark/join_merging.q.out +++ b/ql/src/test/results/clientpositive/spark/join_merging.q.out @@ -138,7 +138,7 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_size > 10) (type: boolean) + predicate: ((p_size > 10) and p_partkey is not null) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_size (type: int) @@ -155,16 +155,19 @@ STAGE PLANS: TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -190,7 +193,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out index 2e1c74d..71e06bb 100644 --- a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out @@ -35,18 +35,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -109,18 +109,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -231,38 +231,34 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -341,24 +337,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -459,107 +455,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] - /srcpart/ds=2008-04-09/hr=11 [a] - /srcpart/ds=2008-04-09/hr=12 [a] Map 3 Map Operator Tree: TableScan @@ -568,18 +466,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -639,47 +537,38 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -699,8 +588,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM srcpart a @@ -714,8 +601,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -1072,18 +957,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -1197,18 +1082,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -1268,42 +1153,38 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/mergejoins.q.out b/ql/src/test/results/clientpositive/spark/mergejoins.q.out index a3f8f8d..5b84a28 100644 --- a/ql/src/test/results/clientpositive/spark/mergejoins.q.out +++ b/ql/src/test/results/clientpositive/spark/mergejoins.q.out @@ -265,7 +265,7 @@ STAGE PLANS: Left Outer Join1 to 2 filter predicates: 0 - 1 {(UDFToDouble(KEY.reducesinkkey0) < UDFToDouble(10))} + 1 {(UDFToDouble(KEY.reducesinkkey0) < 10.0)} 2 keys: 0 _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out index 2d0270b..497a29e 100644 --- a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out @@ -33,19 +33,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -101,21 +105,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -215,149 +223,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [b] /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0) and (UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -377,8 +278,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -392,8 +291,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -444,18 +341,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -518,18 +415,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -640,38 +537,34 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out index 1fc8232..7f60d98 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out @@ -33,17 +33,17 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Map 3 Map Operator Tree: @@ -51,38 +51,35 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -151,17 +148,17 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Map 3 Map Operator Tree: @@ -169,38 +166,35 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out index 771609a..641af3c 100644 --- a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out @@ -35,18 +35,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -105,24 +105,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -223,149 +223,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [b] /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -385,8 +278,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -400,8 +291,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -452,18 +341,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -577,18 +466,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -648,42 +537,38 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -766,18 +651,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -840,18 +725,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -962,38 +847,34 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out index 1901dba..8768b45 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -246,22 +246,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value = value) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -929,7 +929,7 @@ POSTHOOK: query: insert into t values(0) POSTHOOK: type: QUERY POSTHOOK: Output: default@t POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from t where exists (select count(*) from src where 1=2) PREHOOK: type: QUERY POSTHOOK: query: explain select * from t where exists (select count(*) from src where 1=2) @@ -943,7 +943,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -975,9 +975,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: true (type: boolean) - sort order: + - Map-reduce partition columns: true (type: boolean) + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 2 @@ -989,10 +987,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1006,16 +1004,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: true (type: boolean) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1023,7 +1014,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from t where exists (select count(*) from src where 1=2) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 7f53d27..8ac5aa8 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -290,7 +290,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) @@ -356,16 +356,11 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -419,10 +414,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 6 <- Reducer 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -529,32 +523,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -612,8 +589,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -638,16 +614,20 @@ STAGE PLANS: Filter Operator predicate: ((value = value) and (key > '9')) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -665,23 +645,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -932,10 +895,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (GROUP, 1) Reducer 7 <- Map 6 (GROUP, 2) - Reducer 8 <- Reducer 7 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -979,19 +941,19 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (l_partkey = l_partkey) (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(l_quantity) keys: l_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct) Reducer 2 Reduce Operator Tree: @@ -1049,29 +1011,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: double), _col0 (type: int) - mode: hash + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: int) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1103,7 +1052,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1130,18 +1079,18 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_type (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1165,21 +1114,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: complete + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1517,22 +1461,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name = p_name) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_partkey (type: int) outputColumnNames: p_name, p_partkey - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_name (type: string), p_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1571,16 +1515,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2047,22 +1991,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_brand = p_brand) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_brand is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_brand (type: string) outputColumnNames: p_type, p_brand - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_type (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2086,12 +2030,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2552,18 +2496,18 @@ STAGE PLANS: alias: sc Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = key) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -2648,16 +2592,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 8 Reduce Operator Tree: Group By Operator @@ -3303,17 +3247,17 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: @@ -3436,17 +3380,17 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Map 5 Map Operator Tree: @@ -3454,17 +3398,17 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: @@ -3475,10 +3419,10 @@ STAGE PLANS: 0 _col1 (type: string), _col2 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3492,21 +3436,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col4 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3589,17 +3533,17 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Map 5 Map Operator Tree: @@ -3607,17 +3551,17 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: @@ -3628,10 +3572,10 @@ STAGE PLANS: 0 _col1 (type: string), _col2 (type: string), _col3 (type: int) 1 _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3645,21 +3589,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3724,17 +3668,17 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_size = p_size) and p_type is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: @@ -3833,10 +3777,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) Reducer 7 <- Map 6 (GROUP, 2) - Reducer 8 <- Reducer 7 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3860,37 +3803,37 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_type (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -3924,10 +3867,10 @@ STAGE PLANS: 0 _col4 (type: string), UDFToLong(_col5) (type: bigint) 1 _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3938,18 +3881,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 7 Reduce Operator Tree: @@ -3958,33 +3901,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: bigint) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4015,9 +3941,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (GROUP, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -4041,19 +3966,19 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey = p_partkey) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) keys: p_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct) Reducer 2 Reduce Operator Tree: @@ -4079,33 +4004,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: double) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: double) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4160,11 +4068,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 6 <- Reducer 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -4195,7 +4102,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -4223,10 +4130,10 @@ STAGE PLANS: 0 _col0 (type: int), _col5 (type: int) 1 _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 30 Data size: 7485 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 61 Data size: 14971 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 7485 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 61 Data size: 14971 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4268,33 +4175,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 6805 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 6805 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Statistics: Num rows: 28 Data size: 6805 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE + Reducer 7 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -4337,10 +4227,9 @@ STAGE PLANS: Spark Edges: Reducer 10 <- Map 7 (GROUP, 2) - Reducer 11 <- Reducer 10 (GROUP, 2) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) Reducer 8 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### @@ -4366,37 +4255,37 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_type (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 10 Reduce Operator Tree: @@ -4405,40 +4294,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: bigint) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reducer 2 Reduce Operator Tree: @@ -4488,17 +4360,17 @@ STAGE PLANS: 0 _col4 (type: string), UDFToLong(_col5) (type: bigint) 1 _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 9 Data size: 1345 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col5 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 5 Data size: 747 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 747 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 747 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4509,18 +4381,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 8 Reduce Operator Tree: @@ -4529,18 +4401,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 @@ -4593,10 +4465,9 @@ STAGE PLANS: Spark Edges: Reducer 10 <- Map 7 (GROUP, 2) - Reducer 11 <- Reducer 10 (GROUP, 2) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) Reducer 8 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### @@ -4622,37 +4493,37 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey = p_partkey) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_partkey (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey = p_partkey) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) keys: p_partkey (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct) Reducer 10 Reduce Operator Tree: @@ -4661,40 +4532,23 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: double) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: double) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reducer 2 Reduce Operator Tree: @@ -4744,17 +4598,17 @@ STAGE PLANS: 0 _col0 (type: int), UDFToDouble(_col5) (type: double) 1 _col1 (type: int), _col0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 9 Data size: 1345 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col5 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 5 Data size: 747 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 747 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 747 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4765,18 +4619,18 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 8 Reduce Operator Tree: @@ -4785,18 +4639,18 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 @@ -4897,7 +4751,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 UDFToLong(_col0) (type: bigint) 1 _col0 (type: bigint) @@ -4920,16 +4774,11 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -5061,9 +4910,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (GROUP, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -5087,7 +4935,7 @@ STAGE PLANS: alias: tt Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (j = j) (type: boolean) + predicate: j is not null (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: j (type: int), i (type: int) @@ -5130,9 +4978,8 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint), _col0 (type: int) - mode: hash + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -5140,18 +4987,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 714c21f..01ec85e 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -30517,18 +30517,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean) + Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30545,7 +30545,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Execution mode: vectorized Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/subq_where_serialization.q.out b/ql/src/test/results/clientpositive/subq_where_serialization.q.out index f689651..45ceeaa 100644 --- a/ql/src/test/results/clientpositive/subq_where_serialization.q.out +++ b/ql/src/test/results/clientpositive/subq_where_serialization.q.out @@ -3,67 +3,38 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src.key from src where src.key in ( select distinct key from src) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 , consists of Stage-5, Stage-1 - Stage-5 has a backup stage: Stage-1 - Stage-3 depends on stages: Stage-5 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-1 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash + Select Operator + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Conditional Operator - - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce @@ -96,47 +67,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out index b271529..cfc7652 100644 --- a/ql/src/test/results/clientpositive/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists.q.out @@ -231,22 +231,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value = value) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -915,7 +915,7 @@ POSTHOOK: query: insert into t values(0) POSTHOOK: type: QUERY POSTHOOK: Output: default@t POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from t where exists (select count(*) from src where 1=2) PREHOOK: type: QUERY POSTHOOK: query: explain select * from t where exists (select count(*) from src where 1=2) @@ -943,9 +943,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: true (type: boolean) - sort order: + - Map-reduce partition columns: true (type: boolean) + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reduce Operator Tree: @@ -956,19 +954,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: true (type: boolean) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -987,7 +978,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -996,10 +987,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1011,7 +1002,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from t where exists (select count(*) from src where 1=2) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out index 7473928..e020a14 100644 --- a/ql/src/test/results/clientpositive/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -328,7 +328,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -377,17 +377,12 @@ STAGE PLANS: expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -474,7 +469,7 @@ STAGE PLANS: Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -504,7 +499,7 @@ STAGE PLANS: Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1119,23 +1114,23 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value = value) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator - Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 1125 Data size: 11952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -1165,9 +1160,9 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 1125 Data size: 11952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -2157,7 +2152,7 @@ STAGE PLANS: Demux Operator Statistics: Num rows: 6 Data size: 1249 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 7 Data size: 1457 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1873 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join0 to 1 @@ -2190,14 +2185,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 624 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 7 Data size: 1457 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1873 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join0 to 1 diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.out index 375a48a..cf8863d 100644 --- a/ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -228,11 +228,10 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2, Stage-7 + Stage-3 depends on stages: Stage-2, Stage-6 Stage-4 is a root stage Stage-5 depends on stages: Stage-4 Stage-6 is a root stage - Stage-7 depends on stages: Stage-6 Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -320,7 +319,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -455,46 +454,19 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: double), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -526,7 +498,7 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 1173.15 Manufacturer#2 1690.68 -Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) from part b @@ -639,7 +611,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator @@ -767,24 +739,15 @@ STAGE PLANS: predicate: ((_col1 - _col2) > 600.0) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -792,7 +755,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select b.p_mfgr, min(p_retailprice) from part b group by b.p_mfgr @@ -1121,7 +1084,7 @@ STAGE PLANS: key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 171 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -1156,19 +1119,19 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c1 = c1) (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(c1) keys: c1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1191,18 +1154,18 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c1 = c1) (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: c1 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -1235,7 +1198,7 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1244,7 +1207,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 171 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1278,17 +1241,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index 94210fd..aa26dc5 100644 --- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -187,48 +187,10 @@ where b.key in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -245,11 +207,25 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value = value) and (key > '9')) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -441,7 +417,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -544,12 +520,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 = _col0) (type: boolean) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/udf_between.q.out b/ql/src/test/results/clientpositive/udf_between.q.out index 2bb7264..8070735 100644 --- a/ql/src/test/results/clientpositive/udf_between.q.out +++ b/ql/src/test/results/clientpositive/udf_between.q.out @@ -26,7 +26,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) + 100.0) BETWEEN 100 AND 200 (type: boolean) + predicate: (UDFToDouble(key) + 100.0) BETWEEN 100.0 AND 200.0 (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -81,7 +81,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not (UDFToDouble(key) + 100.0) BETWEEN 100 AND 200) (type: boolean) + predicate: (not (UDFToDouble(key) + 100.0) BETWEEN 100.0 AND 200.0) (type: boolean) Statistics: Num rows: 445 Data size: 4727 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/vector_between_columns.q.out b/ql/src/test/results/clientpositive/vector_between_columns.q.out index cf50a0f..48e9ef3 100644 --- a/ql/src/test/results/clientpositive/vector_between_columns.q.out +++ b/ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -129,13 +129,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string) + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2, 1, 3, 5] - selectExpressions: VectorUDFAdaptor(CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> 5:string + selectExpressions: VectorUDFAdaptor(CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3, col 3) -> 4:boolean) -> 5:string Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -274,8 +274,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:boolean) -> boolean - predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) + predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3, col 3) -> 4:boolean) -> boolean + predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) diff --git a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out index d2a879d..84b9250 100644 --- a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out @@ -202,7 +202,7 @@ STAGE PLANS: alias: vectortab_b_1korc Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) + predicate: (s is not null and dt is not null and ts is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) @@ -226,8 +226,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean - predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 12) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean + predicate: (s is not null and dt is not null and ts is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) diff --git a/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out b/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out index d9591d0..da67386 100644 --- a/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out +++ b/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out @@ -18,7 +18,7 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cint) IN (ctinyint, cbigint) (type: boolean) + predicate: (cint) IN (UDFToInteger(ctinyint), UDFToInteger(cbigint)) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) @@ -35,7 +35,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is int but the common type is bigint + notVectorizedReason: Predicate expression for FILTER operator: Vectorizing IN expression only supported for constant values vectorized: false Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/vector_string_decimal.q.out b/ql/src/test/results/clientpositive/vector_string_decimal.q.out index 3540635..9b3684c 100644 --- a/ql/src/test/results/clientpositive/vector_string_decimal.q.out +++ b/ql/src/test/results/clientpositive/vector_string_decimal.q.out @@ -59,26 +59,43 @@ STAGE PLANS: TableScan alias: orc_decimal Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator - predicate: (id) IN ('100000000', '200000000') (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDoubleColumnInList(col 1, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 0) -> 1:double) -> boolean + predicate: (UDFToDouble(id)) IN (1.0E8, 2.0E8) (type: boolean) Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: decimal(18,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is decimal(18,0) but the common type is string - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator