diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml index ca5028d..9fe829d 100644 --- a/druid-handler/pom.xml +++ b/druid-handler/pom.xml @@ -212,6 +212,17 @@ org.apache.calcite calcite-druid ${calcite.version} + + + org.apache.calcite.avatica + avatica-core + + + + + org.apache.calcite.avatica + avatica + ${avatica.version} diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java index 0b35428..be374af 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java @@ -170,7 +170,11 @@ private static String createSelectStarQuery(String dataSource) throws IOExceptio // Create Select query SelectQueryBuilder builder = new Druids.SelectQueryBuilder(); builder.dataSource(dataSource); - builder.intervals(Arrays.asList(DruidTable.DEFAULT_INTERVAL)); + final List intervals = Arrays.asList(); + new Interval(DruidTable.DEFAULT_INTERVAL.getStartMillis(), + DruidTable.DEFAULT_INTERVAL.getEndMillis(), + ISOChronology.getInstanceUTC()); + builder.intervals(intervals); builder.pagingSpec(PagingSpec.newSpec(1)); Map context = new HashMap<>(); context.put(Constants.DRUID_QUERY_FETCH, false); @@ -413,11 +417,15 @@ private static String createSelectStarQuery(String dataSource) throws IOExceptio private static List> createSplitsIntervals(List intervals, int numSplits ) { - final long totalTime = DruidDateTimeUtils.extractTotalTime(intervals); + long startTime = intervals.get(0).getStartMillis(); long endTime = startTime; long currTime = 0; List> newIntervals = new ArrayList<>(); + long totalTime = 0; + for (Interval interval: intervals) { + totalTime += interval.getEndMillis() - interval.getStartMillis(); + } for (int i = 0, posIntervals = 0; i < numSplits; i++) { final long rangeSize = Math.round((double) (totalTime * (i + 1)) / numSplits) - Math.round((double) (totalTime * i) / numSplits); diff --git a/pom.xml b/pom.xml index 3ddec7a..3431dfd 100644 --- a/pom.xml +++ b/pom.xml @@ -112,10 +112,10 @@ 3.5.2 1.5.6 0.1 - 1.8.0 + 1.10.0-SNAPSHOT 1.7.7 0.8.0.RELEASE - 1.10.0 + 1.12.0-SNAPSHOT 4.2.1 4.1.6 4.1.7 diff --git a/ql/pom.xml b/ql/pom.xml index 7db0ede..c541538 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -379,12 +379,22 @@ com.fasterxml.jackson.core jackson-core + + org.apache.calcite.avatica + avatica-core + org.apache.calcite calcite-druid ${calcite.version} + + + org.apache.calcite.avatica + avatica-core + + org.apache.calcite.avatica diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 1d78b4c..b121eea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -34,6 +34,7 @@ import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.adapter.druid.DruidSchema; import org.apache.calcite.adapter.druid.DruidTable; +import org.apache.calcite.adapter.druid.LocalInterval; import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptMaterialization; @@ -310,7 +311,7 @@ private static RelNode createTableScan(Table viewTable) { } metrics.add(field.getName()); } - List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); + List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java index 9a65de3..d0b1757 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.plan.Context; import org.apache.calcite.rel.RelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; @@ -27,11 +28,14 @@ public class HivePlannerContext implements Context { private HiveAlgorithmsConf algoConfig; private HiveRulesRegistry registry; + private CalciteConnectionConfig calciteConfig; private Set corrScalarRexSQWithAgg; - public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, Set corrScalarRexSQWithAgg) { + public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, + CalciteConnectionConfig calciteConfig, Set corrScalarRexSQWithAgg) { this.algoConfig = algoConfig; this.registry = registry; + this.calciteConfig = calciteConfig; // this is to keep track if a subquery is correlated and contains aggregate // this is computed in CalcitePlanner while planning and is later required by subuery remove rule // hence this is passed using HivePlannerContext @@ -45,6 +49,9 @@ public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry regis if (clazz.isInstance(registry)) { return clazz.cast(registry); } + if (clazz.isInstance(calciteConfig)) { + return clazz.cast(calciteConfig); + } if(clazz.isInstance(corrScalarRexSQWithAgg)) { return clazz.cast(corrScalarRexSQWithAgg); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java index 4edc4df..0b94b8a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java @@ -24,6 +24,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlTypeTransforms; import com.google.common.collect.Sets; @@ -42,9 +43,10 @@ Sets.newHashSet(YEAR, QUARTER, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND); private HiveExtractDate(String name) { - super(name, SqlKind.EXTRACT, ReturnTypes.INTEGER_NULLABLE, null, - OperandTypes.INTERVALINTERVAL_INTERVALDATETIME, - SqlFunctionCategory.SYSTEM); + super(name, SqlKind.EXTRACT, + ReturnTypes.cascade(ReturnTypes.INTEGER, SqlTypeTransforms.FORCE_NULLABLE), null, + OperandTypes.INTERVALINTERVAL_INTERVALDATETIME, + SqlFunctionCategory.SYSTEM); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java index 38d7906..81de33f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptMaterializations; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; @@ -77,7 +78,7 @@ protected void apply(RelOptRuleCall call, Project project, Filter filter, TableS // Costing is done in transformTo(), so we call it repeatedly with all applicable // materialized views and cheapest one will be picked List applicableMaterializations = - VolcanoPlanner.getApplicableMaterializations(root, materializations); + RelOptMaterializations.getApplicableMaterializations(root, materializations); for (RelOptMaterialization materialization : applicableMaterializations) { List subs = new MaterializedViewSubstitutionVisitor( materialization.queryRel, root, relBuilderFactory).go(materialization.tableRel); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index 69e157e..9bcdd0c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -165,7 +165,7 @@ public RelOptPredicateList getPredicates(Project project, RelMetadataQuery mq) { rexBuilder.makeInputRef(project, expr.i), expr.e)); } } - return RelOptPredicateList.of(projectPullUpPredicates); + return RelOptPredicateList.of(rexBuilder, projectPullUpPredicates); } /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */ @@ -202,6 +202,7 @@ public RelOptPredicateList getPredicates(Aggregate agg, RelMetadataQuery mq) { final RelNode input = agg.getInput(); final RelOptPredicateList inputInfo = mq.getPulledUpPredicates(input); final List aggPullUpPredicates = new ArrayList<>(); + final RexBuilder rexBuilder = agg.getCluster().getRexBuilder(); ImmutableBitSet groupKeys = agg.getGroupSet(); Mapping m = Mappings.create(MappingType.PARTIAL_FUNCTION, @@ -219,7 +220,7 @@ public RelOptPredicateList getPredicates(Aggregate agg, RelMetadataQuery mq) { aggPullUpPredicates.add(r); } } - return RelOptPredicateList.of(aggPullUpPredicates); + return RelOptPredicateList.of(rexBuilder, aggPullUpPredicates); } /** @@ -271,7 +272,7 @@ public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) { if (!disjPred.isAlwaysTrue()) { preds.add(disjPred); } - return RelOptPredicateList.of(preds); + return RelOptPredicateList.of(rB, preds); } /** @@ -411,6 +412,7 @@ public RelOptPredicateList inferPredicates( final JoinRelType joinType = joinRel.getJoinType(); final List leftPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(leftChildPredicates)); final List rightPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(rightChildPredicates)); + final RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); switch (joinType) { case INNER: case LEFT: @@ -476,13 +478,13 @@ public RelOptPredicateList inferPredicates( pulledUpPredicates = Iterables.concat(leftPreds, rightPreds, RelOptUtil.conjunctions(joinRel.getCondition()), inferredPredicates); } - return RelOptPredicateList.of( + return RelOptPredicateList.of(rexBuilder, pulledUpPredicates, leftInferredPredicates, rightInferredPredicates); case LEFT: - return RelOptPredicateList.of( + return RelOptPredicateList.of(rexBuilder, leftPreds, EMPTY_LIST, rightInferredPredicates); case RIGHT: - return RelOptPredicateList.of( + return RelOptPredicateList.of(rexBuilder, rightPreds, leftInferredPredicates, EMPTY_LIST); default: assert inferredPredicates.size() == 0; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index 0dc0c24..79b6522 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseDriver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; public class ASTBuilder { @@ -269,19 +271,21 @@ public static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral) type = ((Boolean) val).booleanValue() ? HiveParser.KW_TRUE : HiveParser.KW_FALSE; break; case DATE: { - val = literal.getValue(); + final Calendar c = (Calendar) literal.getValue(); + final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone())); type = HiveParser.TOK_DATELITERAL; DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); - val = df.format(((Calendar) val).getTime()); + val = df.format(dt.withZoneRetainFields(DateTimeZone.getDefault()).toDate()); val = "'" + val + "'"; } break; case TIME: case TIMESTAMP: { - val = literal.getValue(); + final Calendar c = (Calendar) literal.getValue(); + final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone())); type = HiveParser.TOK_TIMESTAMPLITERAL; DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); - val = df.format(((Calendar) val).getTime()); + val = df.format(dt.withZoneRetainFields(DateTimeZone.getDefault()).toDate()); val = "'" + val + "'"; } break; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 27990a2..165f8c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -24,7 +24,6 @@ import java.util.Map; import org.apache.calcite.adapter.druid.DruidQuery; -import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelVisitor; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index e840938..b1efbbd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.translator; import java.math.BigDecimal; -import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Calendar; @@ -75,6 +74,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -291,16 +292,17 @@ public ExprNodeDesc visitLiteral(RexLiteral literal) { case DOUBLE: return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, Double.valueOf(((Number) literal.getValue3()).doubleValue())); - case DATE: + case DATE: { + final Calendar c = (Calendar) literal.getValue(); return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, - new Date(((Calendar)literal.getValue()).getTimeInMillis())); + new java.sql.Date(c.getTimeInMillis())); + } case TIME: case TIMESTAMP: { - Object value = literal.getValue3(); - if (value instanceof Long) { - value = new Timestamp((Long)value); - } - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value); + final Calendar c = (Calendar) literal.getValue(); + final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone())); + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + new Timestamp(dt.getMillis())); } case BINARY: return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index a05b89c..26c3bc8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -23,11 +23,12 @@ import java.util.ArrayList; import java.util.Calendar; import java.util.Date; -import java.util.GregorianCalendar; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; +import org.apache.calcite.avatica.util.DateTimeUtils; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.RelOptCluster; @@ -38,8 +39,8 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlKind; @@ -96,6 +97,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; @@ -634,20 +637,22 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value)); break; case DATE: - Calendar cal = new GregorianCalendar(); - cal.setTime((Date) value); - calciteLiteral = rexBuilder.makeDateLiteral(cal); - break; - case TIMESTAMP: - Calendar c = null; - if (value instanceof Calendar) { - c = (Calendar)value; - } else { - c = Calendar.getInstance(); - c.setTimeInMillis(((Timestamp)value).getTime()); - } - calciteLiteral = rexBuilder.makeTimestampLiteral(c, RelDataType.PRECISION_NOT_SPECIFIED); - break; + final Calendar cal = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault()); + cal.setTime((Date) value); + calciteLiteral = rexBuilder.makeDateLiteral(cal); + break; + case TIMESTAMP: + final Calendar calt = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault()); + if (value instanceof Calendar) { + final Calendar c = (Calendar) value; + long timeMs = c.getTimeInMillis(); + calt.setTimeInMillis(timeMs - c.getTimeZone().getOffset(timeMs)); + } else { + final Timestamp ts = (Timestamp) value; + calt.setTimeInMillis(ts.getTime() - (ts.getTimezoneOffset() * 60 * 1000)); + } + calciteLiteral = rexBuilder.makeTimestampLiteral(calt, RelDataType.PRECISION_NOT_SPECIFIED); + break; case INTERVAL_YEAR_MONTH: // Calcite year-month literal value is months as BigDecimal BigDecimal totalMonths = BigDecimal.valueOf(((HiveIntervalYearMonth) value).getTotalMonths()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 21bf020..ed3d526 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -38,6 +38,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -50,10 +51,12 @@ import org.apache.calcite.adapter.druid.DruidRules; import org.apache.calcite.adapter.druid.DruidSchema; import org.apache.calcite.adapter.druid.DruidTable; +import org.apache.calcite.adapter.druid.LocalInterval; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.config.CalciteConnectionConfigImpl; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptMaterialization; import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelOptPlanner.Executor; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptUtil; @@ -93,6 +96,7 @@ import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rel.type.RelDataTypeImpl; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexExecutor; import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; @@ -1298,7 +1302,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); HiveRulesRegistry registry = new HiveRulesRegistry(); - HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, corrScalarRexSQWithAgg); + CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(new Properties()); + HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, + corrScalarRexSQWithAgg); RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); final RexBuilder rexBuilder = cluster.getRexBuilder(); final RelOptCluster optCluster = RelOptCluster.create(planner, rexBuilder); @@ -1323,10 +1329,16 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation"); + // Create executor + RexExecutor executorProvider = new HiveRexExecutorImpl(optCluster); + calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider); + // We need to get the ColumnAccessInfo and viewToTableSchema for views. HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveRelFactories.HIVE_BUILDER.create(optCluster, null), this.columnAccessInfo, this.viewProjectToTableSchema); + + //basePlan.getCluster().getPlanner().setExecutor(executorProvider); fieldTrimmer.trim(calciteGenPlan); // Create and set MD provider @@ -1334,9 +1346,6 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu RelMetadataQuery.THREAD_PROVIDERS.set( JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider())); - // Create executor - Executor executorProvider = new HiveRexExecutorImpl(optCluster); - //Remove subquery LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null, @@ -1558,7 +1567,7 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { * executor * @return */ - private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider, Executor executorProvider) { + private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) { // TODO: Decorelation of subquery should be done before attempting // Partition Pruning; otherwise Expression evaluation may try to execute // corelated sub query. @@ -1576,13 +1585,13 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv "Calcite: HiveProjectOverIntersectRemoveRule and HiveIntersectMerge rules"); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.BOTTOM_UP, + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, HiveIntersectRewriteRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: HiveIntersectRewrite rule"); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.BOTTOM_UP, + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, HiveExceptRewriteRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: HiveExceptRewrite rule"); @@ -1595,7 +1604,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // Its not clear, if this rewrite is always performant on MR, since extra map phase // introduced for 2nd MR job may offset gains of this multi-stage aggregation. // We need a cost model for MR to enable this on MR. - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveExpandDistinctAggregatesRule.INSTANCE); + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveExpandDistinctAggregatesRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite"); } @@ -1606,7 +1615,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.ARBITRARY, + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.ARBITRARY, new HivePreFilteringRule(maxCNFNodeCount)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); @@ -1661,10 +1670,10 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE); final long reductionTuples = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES); - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveSortMergeRule.INSTANCE, + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveSortMergeRule.INSTANCE, HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE, HiveSortUnionReduceRule.INSTANCE); - basePlan = hepPlan(basePlan, true, mdProvider, null, HepMatchOrder.BOTTOM_UP, + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, new HiveSortRemoveRule(reductionProportion, reductionTuples), HiveProjectSortTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, @@ -1673,14 +1682,14 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // 5. Push Down Semi Joins perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, SemiJoinJoinTransposeRule.INSTANCE, + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); // 6. Apply Partition Pruning perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, new HivePartitionPruneRule(conf)); + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, new HivePartitionPruneRule(conf)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Partition Pruning"); @@ -1695,7 +1704,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // 8. Merge, remove and reduce Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, + basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Merge Project-Project"); @@ -1705,7 +1714,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // storage (incase there are filters on non partition cols). This only // matches FIL-PROJ-TS perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, HiveProjectFilterPullUpConstantsRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, @@ -1725,7 +1734,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv * @return optimized RelNode */ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, Executor executorProvider, RelOptRule... rules) { + RelMetadataProvider mdProvider, RexExecutor executorProvider, RelOptRule... rules) { return hepPlan(basePlan, followPlanChanges, mdProvider, executorProvider, HepMatchOrder.TOP_DOWN, rules); } @@ -1742,7 +1751,7 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, * @return optimized RelNode */ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, Executor executorProvider, HepMatchOrder order, + RelMetadataProvider mdProvider, RexExecutor executorProvider, HepMatchOrder order, RelOptRule... rules) { RelNode optimizedRelNode = basePlan; @@ -1769,6 +1778,7 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, if (executorProvider != null) { basePlan.getCluster().getPlanner().setExecutor(executorProvider); + planner.setExecutor(executorProvider); } planner.setRoot(basePlan); @@ -2248,7 +2258,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc } metrics.add(field.getName()); } - List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); + List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals); @@ -4143,5 +4153,4 @@ private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { DRUID, NATIVE } - } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java index 7229cc7..4823950 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java @@ -61,7 +61,7 @@ public void testRuleFiredOnlyOnce() { // Create rules registry to not trigger a rule more than once HiveRulesRegistry registry = new HiveRulesRegistry(); - HivePlannerContext context = new HivePlannerContext(null, registry, null); + HivePlannerContext context = new HivePlannerContext(null, registry, null, null); HepPlanner planner = new HepPlanner(programBuilder.build(), context); // Cluster diff --git a/ql/src/test/results/clientpositive/constprog2.q.out b/ql/src/test/results/clientpositive/constprog2.q.out index 7bfd0cf..cbc5fd2 100644 --- a/ql/src/test/results/clientpositive/constprog2.q.out +++ b/ql/src/test/results/clientpositive/constprog2.q.out @@ -67,6 +67,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT src1.key, src1.key + 1, src2.value FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND cast(src1.key as double) = 86 @@ -90,28 +91,22 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 86.0) (type: boolean) Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE TableScan alias: src2 Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) = 86.0) (type: boolean) + predicate: (86 = key) (type: boolean) Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: @@ -119,17 +114,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 550 Data size: 5831 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col2 + Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), (_col0 + 1) (type: int), _col2 (type: string) + expressions: 86 (type: int), 87 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/druid_basic2.q.out b/ql/src/test/results/clientpositive/druid_basic2.q.out index bc9410b..d54d0a5 100644 --- a/ql/src/test/results/clientpositive/druid_basic2.q.out +++ b/ql/src/test/results/clientpositive/druid_basic2.q.out @@ -77,7 +77,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -104,7 +104,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":["delta"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":[],"metrics":["delta"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -135,7 +135,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -166,7 +166,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -212,7 +212,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: language is not null (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -237,7 +237,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: language is not null (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -274,7 +274,7 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select #### A masked pattern was here #### name default.druid_table_1 @@ -300,7 +300,7 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select #### A masked pattern was here #### name default.druid_table_1 @@ -399,7 +399,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false @@ -414,7 +414,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -442,7 +442,7 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select #### A masked pattern was here #### name default.druid_table_1 @@ -468,7 +468,7 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select #### A masked pattern was here #### name default.druid_table_1 @@ -545,8 +545,7 @@ LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -554,27 +553,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"day","dimensions":["robot","language"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator - expressions: robot (type: string), language (type: string), __time (type: timestamp), added (type: float), delta (type: float) - outputColumnNames: robot, language, __time, added, delta + expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: max(added), sum(delta) - keys: robot (type: string), language (type: string), floor_day(__time) (type: timestamp) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int), _col2 (type: bigint) + null sort order: az + sort order: +- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp) - null sort order: aaa - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - tag: -1 - value expressions: _col3 (type: float), _col4 (type: double) - auto parallelism: false + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: timestamp), _col3 (type: float) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -592,6 +589,8 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"day","dimensions":["robot","language"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -616,6 +615,8 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"day","dimensions":["robot","language"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -634,81 +635,8 @@ STAGE PLANS: /druid_table_1 [druid_table_1] Needs Tagging: false Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: float), _col4 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1,_col2,_col3 - columns.types string,timestamp,float,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: UDFToInteger(_col0) (type: int), _col2 (type: float) - null sort order: az - sort order: +- - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: timestamp), _col3 (type: double) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1,_col2,_col3 - columns.types string,timestamp,float,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1,_col2,_col3 - columns.types string,timestamp,float,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: float), VALUE._col2 (type: double) + expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: bigint), VALUE._col2 (type: float) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Limit @@ -726,7 +654,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 - columns.types string:timestamp:float:double + columns.types string:timestamp:bigint:float escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/druid_timeseries.q.out b/ql/src/test/results/clientpositive/druid_timeseries.q.out index 6b2ffe9..372927b 100644 --- a/ql/src/test/results/clientpositive/druid_timeseries.q.out +++ b/ql/src/test/results/clientpositive/druid_timeseries.q.out @@ -79,7 +79,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"longMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"longMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -109,7 +109,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"NONE","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"none","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -139,7 +139,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"YEAR","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"year","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -169,7 +169,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"QUARTER","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"quarter","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -199,7 +199,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"MONTH","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"month","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -229,7 +229,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"WEEK","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"week","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -259,7 +259,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"DAY","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"day","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -289,7 +289,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"HOUR","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"hour","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -319,7 +319,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"MINUTE","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"minute","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -349,7 +349,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"SECOND","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"second","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -381,7 +381,7 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"HOUR","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"hour","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -418,7 +418,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -495,7 +495,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator diff --git a/ql/src/test/results/clientpositive/druid_topn.q.out b/ql/src/test/results/clientpositive/druid_topn.q.out index 57d6586..9ef03b4 100644 --- a/ql/src/test/results/clientpositive/druid_topn.q.out +++ b/ql/src/test/results/clientpositive/druid_topn.q.out @@ -85,8 +85,8 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"all","dimension":"robot","metric":"$f1","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":100} - druid.query.type topN + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f1","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: robot (type: string), $f1 (type: bigint), $f2 (type: float) @@ -109,24 +109,54 @@ ORDER BY s DESC LIMIT 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"none","dimensions":["robot"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: __time (type: timestamp), robot (type: string), $f2 (type: bigint), $f3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: float) + sort order: - + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: timestamp), VALUE._col1 (type: string), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: timestamp), _col2 (type: bigint), _col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"NONE","dimension":"robot","metric":"$f3","aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":100} - druid.query.type topN - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_year(`__time`), max(added), sum(variation) as s @@ -143,24 +173,50 @@ ORDER BY s DESC LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"year","dimensions":["robot"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: float) + sort order: - + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: timestamp), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator - limit: -1 + limit: 10 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"YEAR","dimension":"robot","metric":"$f3","aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":10} - druid.query.type topN - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_month(`__time`), max(added), sum(variation) as s @@ -177,24 +233,50 @@ ORDER BY s LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"month","dimensions":["robot"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: float) + sort order: + + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: timestamp), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator - limit: -1 + limit: 10 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"ascending"}]},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s @@ -211,24 +293,54 @@ ORDER BY s DESC, m DESC LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"month","dimensions":["robot","namespace"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: float), _col3 (type: bigint) + sort order: -- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col2 (type: timestamp) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col2 (type: timestamp), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: bigint), _col4 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot","namespace"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f4","direction":"descending"},{"dimension":"$f3","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s @@ -245,24 +357,54 @@ ORDER BY robot ASC, m DESC LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"month","dimensions":["robot","namespace"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col3 (type: bigint) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: timestamp), _col4 (type: float) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: bigint), VALUE._col2 (type: float) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: bigint), _col4 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot","namespace"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending"},{"dimension":"$f3","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_year(`__time`), max(added), sum(variation) as s @@ -281,24 +423,54 @@ ORDER BY s LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"year","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: __time (type: timestamp), $f1_0 (type: bigint), $f2 (type: float) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: float) + sort order: + + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: timestamp), _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: timestamp), VALUE._col1 (type: bigint), KEY.reducesinkkey0 (type: float) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: '1' (type: string), _col0 (type: timestamp), _col1 (type: bigint), _col2 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"YEAR","dimensions":[],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f2","direction":"ascending"}]},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: '1' (type: string), __time (type: timestamp), $f1_0 (type: bigint), $f2 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_hour(`__time`), max(added) as m, sum(variation) @@ -333,7 +505,7 @@ STAGE PLANS: alias: druid_table_1 filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 8c6f0f1..01d60f7 100644 --- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -321,7 +321,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col2 + _col4) > 2.0) or ((_col1 + 1) > 2)) (type: boolean) + predicate: (((_col2 + _col4) > 2) or ((_col1 + 1) > 2)) (type: boolean) Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/join_merging.q.out b/ql/src/test/results/clientpositive/join_merging.q.out index 79a7023..ef9ca18 100644 --- a/ql/src/test/results/clientpositive/join_merging.q.out +++ b/ql/src/test/results/clientpositive/join_merging.q.out @@ -109,7 +109,7 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_size > 10) (type: boolean) + predicate: ((p_size > 10) and p_partkey is not null) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_size (type: int) @@ -124,20 +124,23 @@ STAGE PLANS: TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index a867bd2..841ef14 100644 --- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -1044,7 +1044,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) @@ -1063,7 +1063,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out index c08a534..b69d0bd 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out @@ -1261,7 +1261,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) @@ -1293,7 +1293,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + 1) (type: int) diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out index 98d2328..4bdb186 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out @@ -274,7 +274,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] @@ -311,7 +311,7 @@ Stage-0 Select Operator [SEL_2] (rows=5 width=20) Output:["_col0","_col1","_col2"] Filter Operator [FIL_15] (rows=5 width=20) - predicate:((dimid = 100) and (dimid = 100) is not null) + predicate:(dimid = 100) TableScan [TS_0] (rows=10 width=20) default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap @@ -322,7 +322,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] @@ -372,7 +372,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] @@ -409,7 +409,7 @@ Stage-0 Select Operator [SEL_2] (rows=5 width=20) Output:["_col0","_col1","_col2"] Filter Operator [FIL_15] (rows=5 width=20) - predicate:((dimid = 100) and (dimid = 100) is not null) + predicate:(dimid = 100) TableScan [TS_0] (rows=10 width=20) default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap @@ -420,7 +420,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] @@ -459,7 +459,7 @@ Stage-0 Select Operator [SEL_2] (rows=5 width=20) Output:["_col0","_col1","_col2"] Filter Operator [FIL_15] (rows=5 width=20) - predicate:((dimid = 100) and (dimid = 100) is not null) + predicate:(dimid = 100) TableScan [TS_0] (rows=10 width=20) default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap @@ -470,7 +470,7 @@ Stage-0 Select Operator [SEL_5] (rows=2 width=3) Output:["_col0","_col1"] Filter Operator [FIL_16] (rows=2 width=3) - predicate:((id = 100) and (id = 100) is not null) + predicate:(id = 100) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 53c728b..54a864a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -208,8 +208,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean - predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 12) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean + predicate: (s is not null and dt is not null and ts is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) @@ -274,8 +274,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean - predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 12) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean + predicate: (s is not null and dt is not null and ts is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) diff --git a/ql/src/test/results/clientpositive/mergejoins.q.out b/ql/src/test/results/clientpositive/mergejoins.q.out index 1023f61..4a290a1 100644 --- a/ql/src/test/results/clientpositive/mergejoins.q.out +++ b/ql/src/test/results/clientpositive/mergejoins.q.out @@ -251,7 +251,7 @@ STAGE PLANS: Left Outer Join1 to 2 filter predicates: 0 - 1 {(UDFToDouble(KEY.reducesinkkey0) < UDFToDouble(10))} + 1 {(UDFToDouble(KEY.reducesinkkey0) < 10.0)} 2 keys: 0 _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/perf/query15.q.out b/ql/src/test/results/clientpositive/perf/query15.q.out index 4f4dcc5..a331cd7 100644 --- a/ql/src/test/results/clientpositive/perf/query15.q.out +++ b/ql/src/test/results/clientpositive/perf/query15.q.out @@ -5,83 +5,77 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 - File Output Operator [FS_31] - Limit [LIM_30] (rows=100 width=135) + Reducer 5 + File Output Operator [FS_30] + Limit [LIM_29] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_29] (rows=174233858 width=135) + Select Operator [SEL_28] (rows=174233858 width=135) Output:["_col0","_col1"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_28] - Group By Operator [GBY_26] (rows=174233858 width=135) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_27] + Group By Operator [GBY_25] (rows=174233858 width=135) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_25] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0 - Group By Operator [GBY_24] (rows=348467716 width=135) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Select Operator [SEL_23] (rows=348467716 width=135) - Output:["_col7","_col2"] - Merge Join Operator [MERGEJOIN_47] (rows=348467716 width=135) - Conds:RS_20._col0=RS_21._col0(Inner),Output:["_col2","_col7"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col0 - Select Operator [SEL_19] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_44] (rows=18262 width=1119) - predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_17] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col0 - Select Operator [SEL_16] (rows=316788826 width=135) - Output:["_col0","_col2","_col7"] - Filter Operator [FIL_15] (rows=316788826 width=135) - predicate:((substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or (_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500)) - Merge Join Operator [MERGEJOIN_46] (rows=316788826 width=135) - Conds:RS_12._col0=RS_13._col1(Inner),Output:["_col3","_col4","_col5","_col7"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_43] (rows=287989836 width=135) - predicate:(cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_6] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=88000001 width=860) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col3","_col4"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_41] (rows=80000000 width=860) - predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=40000000 width=1014) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_42] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_zip"] + Group By Operator [GBY_23] (rows=348467716 width=135) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col4 + Merge Join Operator [MERGEJOIN_45] (rows=348467716 width=135) + Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col4","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_43] (rows=88000001 width=860) + Conds:RS_16._col1=RS_17._col0(Inner),Output:["_col0","_col4"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_39] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=40000000 width=1014) + Output:["_col0","_col2"] + Filter Operator [FIL_40] (rows=40000000 width=1014) + predicate:(((substr(ca_zip, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or (ca_state) IN ('CA', 'WA', 'GA')) and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_zip"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_44] (rows=316788826 width=135) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_41] (rows=287989836 width=135) + predicate:(cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_42] (rows=18262 width=1119) + predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) + TableScan [TS_9] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out b/ql/src/test/results/clientpositive/perf/query23.q.out index 85cee23..a04e5cd 100644 --- a/ql/src/test/results/clientpositive/perf/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/query23.q.out @@ -1,7 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[379][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product -Warning: Shuffle Join MERGEJOIN[380][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product -Warning: Shuffle Join MERGEJOIN[382][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 51' is a cross product -Warning: Shuffle Join MERGEJOIN[383][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 52' is a cross product +Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 49' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from store_sales @@ -107,42 +105,38 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) -Reducer 16 <- Map 23 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 28 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 26 (CUSTOM_SIMPLE_EDGE), Reducer 31 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) -Reducer 26 <- Map 30 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 24 <- Map 28 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 32 <- Map 31 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) -Reducer 33 <- Reducer 32 (SIMPLE_EDGE) -Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) -Reducer 37 <- Reducer 36 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE) -Reducer 38 <- Reducer 37 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 4 <- Reducer 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE) -Reducer 42 <- Map 45 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) -Reducer 43 <- Reducer 42 (SIMPLE_EDGE) -Reducer 47 <- Map 46 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) -Reducer 48 <- Map 55 (SIMPLE_EDGE), Reducer 47 (SIMPLE_EDGE) -Reducer 49 <- Reducer 48 (SIMPLE_EDGE) -Reducer 50 <- Reducer 49 (CUSTOM_SIMPLE_EDGE) -Reducer 51 <- Reducer 50 (CUSTOM_SIMPLE_EDGE), Reducer 60 (CUSTOM_SIMPLE_EDGE) -Reducer 52 <- Reducer 51 (CUSTOM_SIMPLE_EDGE), Reducer 65 (CUSTOM_SIMPLE_EDGE) -Reducer 53 <- Reducer 52 (SIMPLE_EDGE) -Reducer 57 <- Map 56 (SIMPLE_EDGE), Map 61 (SIMPLE_EDGE) -Reducer 58 <- Map 62 (SIMPLE_EDGE), Reducer 57 (SIMPLE_EDGE) -Reducer 59 <- Reducer 58 (SIMPLE_EDGE) +Reducer 30 <- Map 29 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (SIMPLE_EDGE) +Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 35 <- Reducer 34 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) +Reducer 36 <- Reducer 35 (SIMPLE_EDGE), Reducer 49 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) +Reducer 4 <- Reducer 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 40 <- Map 43 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) +Reducer 41 <- Reducer 40 (SIMPLE_EDGE) +Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE) +Reducer 46 <- Map 51 (SIMPLE_EDGE), Reducer 45 (SIMPLE_EDGE) +Reducer 47 <- Reducer 46 (SIMPLE_EDGE) +Reducer 48 <- Reducer 47 (CUSTOM_SIMPLE_EDGE) +Reducer 49 <- Reducer 48 (CUSTOM_SIMPLE_EDGE), Reducer 56 (CUSTOM_SIMPLE_EDGE), Reducer 61 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 52 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 54 <- Map 58 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE) +Reducer 55 <- Reducer 54 (SIMPLE_EDGE) +Reducer 56 <- Reducer 55 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) -Reducer 60 <- Reducer 59 (CUSTOM_SIMPLE_EDGE) -Reducer 64 <- Map 63 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE) -Reducer 65 <- Reducer 64 (SIMPLE_EDGE) +Reducer 60 <- Map 59 (SIMPLE_EDGE), Map 62 (SIMPLE_EDGE) +Reducer 61 <- Reducer 60 (SIMPLE_EDGE) Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 @@ -150,435 +144,413 @@ Stage-0 limit:100 Stage-1 Reducer 6 - File Output Operator [FS_258] - Limit [LIM_257] (rows=1 width=112) + File Output Operator [FS_246] + Limit [LIM_245] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_255] (rows=1 width=112) + Group By Operator [GBY_243] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] - <-Reducer 38 [CONTAINS] - Reduce Output Operator [RS_254] - Group By Operator [GBY_253] (rows=1 width=112) + <-Reducer 36 [CONTAINS] + Reduce Output Operator [RS_242] + Group By Operator [GBY_241] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_249] (rows=191667562 width=135) + Select Operator [SEL_237] (rows=191667562 width=135) Output:["_col0"] - Merge Join Operator [MERGEJOIN_384] (rows=191667562 width=135) - Conds:RS_246._col2=RS_247._col0(Inner),Output:["_col3","_col4"] - <-Reducer 37 [SIMPLE_EDGE] - SHUFFLE [RS_246] + Merge Join Operator [MERGEJOIN_370] (rows=191667562 width=135) + Conds:RS_234._col2=RS_235._col0(Inner),Output:["_col3","_col4"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_234] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_378] (rows=174243235 width=135) - Conds:RS_243._col1=RS_244._col0(Inner),Output:["_col2","_col3","_col4"] - <-Reducer 36 [SIMPLE_EDGE] - SHUFFLE [RS_243] + Merge Join Operator [MERGEJOIN_366] (rows=174243235 width=135) + Conds:RS_231._col1=RS_232._col0(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_231] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_369] (rows=158402938 width=135) - Conds:RS_240._col0=RS_241._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 35 [SIMPLE_EDGE] - SHUFFLE [RS_240] + Merge Join Operator [MERGEJOIN_357] (rows=158402938 width=135) + Conds:RS_228._col0=RS_229._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 33 [SIMPLE_EDGE] + SHUFFLE [RS_228] PartitionCols:_col0 - Select Operator [SEL_127] (rows=144002668 width=135) + Select Operator [SEL_121] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_346] (rows=144002668 width=135) + Filter Operator [FIL_334] (rows=144002668 width=135) predicate:(ws_item_sk is not null and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_125] (rows=144002668 width=135) + TableScan [TS_119] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] - <-Map 39 [SIMPLE_EDGE] - SHUFFLE [RS_241] + <-Map 37 [SIMPLE_EDGE] + SHUFFLE [RS_229] PartitionCols:_col0 - Select Operator [SEL_130] (rows=18262 width=1119) + Select Operator [SEL_124] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_347] (rows=18262 width=1119) + Filter Operator [FIL_335] (rows=18262 width=1119) predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) - TableScan [TS_128] (rows=73049 width=1119) + TableScan [TS_122] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 43 [SIMPLE_EDGE] - SHUFFLE [RS_244] + <-Reducer 41 [SIMPLE_EDGE] + SHUFFLE [RS_232] PartitionCols:_col0 - Group By Operator [GBY_156] (rows=58079562 width=88) + Group By Operator [GBY_150] (rows=58079562 width=88) Output:["_col0"],keys:_col1 - Select Operator [SEL_152] (rows=116159124 width=88) + Select Operator [SEL_146] (rows=116159124 width=88) Output:["_col1"] - Filter Operator [FIL_151] (rows=116159124 width=88) + Filter Operator [FIL_145] (rows=116159124 width=88) predicate:(_col3 > 4) - Select Operator [SEL_360] (rows=348477374 width=88) + Select Operator [SEL_348] (rows=348477374 width=88) Output:["_col0","_col3"] - Group By Operator [GBY_150] (rows=348477374 width=88) + Group By Operator [GBY_144] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_149] + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_143] PartitionCols:_col0 - Group By Operator [GBY_148] (rows=696954748 width=88) + Group By Operator [GBY_142] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_146] (rows=696954748 width=88) + Select Operator [SEL_140] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_371] (rows=696954748 width=88) - Conds:RS_143._col1=RS_144._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 45 [SIMPLE_EDGE] - SHUFFLE [RS_144] + Merge Join Operator [MERGEJOIN_359] (rows=696954748 width=88) + Conds:RS_137._col1=RS_138._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 43 [SIMPLE_EDGE] + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_139] (rows=462000 width=1436) + Select Operator [SEL_133] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_350] (rows=462000 width=1436) + Filter Operator [FIL_338] (rows=462000 width=1436) predicate:i_item_sk is not null - TableScan [TS_137] (rows=462000 width=1436) + TableScan [TS_131] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] - <-Reducer 41 [SIMPLE_EDGE] - SHUFFLE [RS_143] + <-Reducer 39 [SIMPLE_EDGE] + SHUFFLE [RS_137] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_370] (rows=633595212 width=88) - Conds:RS_140._col0=RS_141._col0(Inner),Output:["_col1","_col3"] - <-Map 40 [SIMPLE_EDGE] - SHUFFLE [RS_140] + Merge Join Operator [MERGEJOIN_358] (rows=633595212 width=88) + Conds:RS_134._col0=RS_135._col0(Inner),Output:["_col1","_col3"] + <-Map 38 [SIMPLE_EDGE] + SHUFFLE [RS_134] PartitionCols:_col0 - Select Operator [SEL_133] (rows=575995635 width=88) + Select Operator [SEL_127] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_348] (rows=575995635 width=88) + Filter Operator [FIL_336] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null) - TableScan [TS_131] (rows=575995635 width=88) + TableScan [TS_125] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] - <-Map 44 [SIMPLE_EDGE] - SHUFFLE [RS_141] + <-Map 42 [SIMPLE_EDGE] + SHUFFLE [RS_135] PartitionCols:_col0 - Select Operator [SEL_136] (rows=36525 width=1119) + Select Operator [SEL_130] (rows=36525 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_349] (rows=36525 width=1119) + Filter Operator [FIL_337] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_134] (rows=73049 width=1119) + TableScan [TS_128] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"] - <-Reducer 53 [SIMPLE_EDGE] - SHUFFLE [RS_247] + <-Reducer 49 [SIMPLE_EDGE] + SHUFFLE [RS_235] PartitionCols:_col0 - Group By Operator [GBY_238] (rows=52799601 width=322) - Output:["_col0"],keys:KEY._col0 - <-Reducer 52 [SIMPLE_EDGE] - SHUFFLE [RS_237] - PartitionCols:_col0 - Group By Operator [GBY_236] (rows=105599202 width=322) - Output:["_col0"],keys:_col2 - Select Operator [SEL_235] (rows=105599202 width=322) - Output:["_col2"] - Filter Operator [FIL_234] (rows=105599202 width=322) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_383] (rows=316797606 width=322) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 51 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_231] - Merge Join Operator [MERGEJOIN_382] (rows=1 width=233) - Conds:(Left Outer),Output:["_col1"] - <-Reducer 50 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_228] - Select Operator [SEL_186] (rows=1 width=8) - Filter Operator [FIL_185] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_183] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 49 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_182] - Group By Operator [GBY_181] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_178] (rows=348477374 width=88) - Group By Operator [GBY_177] (rows=348477374 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 48 [SIMPLE_EDGE] - SHUFFLE [RS_176] - PartitionCols:_col0 - Group By Operator [GBY_175] (rows=696954748 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_173] (rows=696954748 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_373] (rows=696954748 width=88) - Conds:RS_170._col1=RS_171._col0(Inner),Output:["_col6"] - <-Map 55 [SIMPLE_EDGE] - SHUFFLE [RS_171] - PartitionCols:_col0 - Select Operator [SEL_166] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_353] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_164] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 47 [SIMPLE_EDGE] - SHUFFLE [RS_170] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_372] (rows=633595212 width=88) - Conds:RS_167._col0=RS_168._col0(Inner),Output:["_col1"] - <-Map 46 [SIMPLE_EDGE] - SHUFFLE [RS_167] - PartitionCols:_col0 - Select Operator [SEL_160] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_351] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_158] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Map 54 [SIMPLE_EDGE] - SHUFFLE [RS_168] - PartitionCols:_col0 - Select Operator [SEL_163] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_352] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_161] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 60 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_229] - Group By Operator [GBY_211] (rows=1 width=224) - Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 59 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_210] - Group By Operator [GBY_209] (rows=1 width=224) - Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_207] (rows=348477374 width=88) - Output:["_col1"] - Group By Operator [GBY_206] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 58 [SIMPLE_EDGE] - SHUFFLE [RS_205] - PartitionCols:_col0 - Group By Operator [GBY_204] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_202] (rows=696954748 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_375] (rows=696954748 width=88) - Conds:RS_199._col1=RS_200._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 62 [SIMPLE_EDGE] - SHUFFLE [RS_200] - PartitionCols:_col0 - Select Operator [SEL_195] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_356] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_193] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 57 [SIMPLE_EDGE] - SHUFFLE [RS_199] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_374] (rows=633595212 width=88) - Conds:RS_196._col0=RS_197._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 56 [SIMPLE_EDGE] - SHUFFLE [RS_196] - PartitionCols:_col0 - Select Operator [SEL_189] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_354] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_187] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 61 [SIMPLE_EDGE] - SHUFFLE [RS_197] - PartitionCols:_col0 - Select Operator [SEL_192] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_355] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_190] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 65 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_232] - Group By Operator [GBY_226] (rows=316797606 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 64 [SIMPLE_EDGE] - SHUFFLE [RS_225] - PartitionCols:_col0 - Group By Operator [GBY_224] (rows=633595212 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_222] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_376] (rows=633595212 width=88) - Conds:RS_219._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 63 [SIMPLE_EDGE] - SHUFFLE [RS_219] - PartitionCols:_col0 - Select Operator [SEL_215] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_357] (rows=575995635 width=88) - predicate:ss_customer_sk is not null - TableScan [TS_213] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 66 [SIMPLE_EDGE] - SHUFFLE [RS_220] - PartitionCols:_col0 - Select Operator [SEL_218] (rows=80000000 width=860) + Select Operator [SEL_227] (rows=105599202 width=321) + Output:["_col0"] + Filter Operator [FIL_226] (rows=105599202 width=321) + predicate:(_col3 > (0.95 * _col1)) + Merge Join Operator [MERGEJOIN_369] (rows=316797606 width=321) + Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 48 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_222] + Select Operator [SEL_180] (rows=1 width=8) + Filter Operator [FIL_179] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_177] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 47 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_176] + Group By Operator [GBY_175] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_172] (rows=348477374 width=88) + Group By Operator [GBY_171] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_170] + PartitionCols:_col0 + Group By Operator [GBY_169] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_167] (rows=696954748 width=88) Output:["_col0"] - Filter Operator [FIL_358] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_216] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + Merge Join Operator [MERGEJOIN_361] (rows=696954748 width=88) + Conds:RS_164._col1=RS_165._col0(Inner),Output:["_col6"] + <-Map 51 [SIMPLE_EDGE] + SHUFFLE [RS_165] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_341] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_158] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 45 [SIMPLE_EDGE] + SHUFFLE [RS_164] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_360] (rows=633595212 width=88) + Conds:RS_161._col0=RS_162._col0(Inner),Output:["_col1"] + <-Map 44 [SIMPLE_EDGE] + SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_154] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_339] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_152] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Map 50 [SIMPLE_EDGE] + SHUFFLE [RS_162] + PartitionCols:_col0 + Select Operator [SEL_157] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_340] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_155] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 56 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_223] + Group By Operator [GBY_205] (rows=1 width=224) + Output:["_col0"],aggregations:["max(VALUE._col0)"] + <-Reducer 55 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_204] + Group By Operator [GBY_203] (rows=1 width=224) + Output:["_col0"],aggregations:["max(_col1)"] + Select Operator [SEL_201] (rows=348477374 width=88) + Output:["_col1"] + Group By Operator [GBY_200] (rows=348477374 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 54 [SIMPLE_EDGE] + SHUFFLE [RS_199] + PartitionCols:_col0 + Group By Operator [GBY_198] (rows=696954748 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_196] (rows=696954748 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_363] (rows=696954748 width=88) + Conds:RS_193._col1=RS_194._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 58 [SIMPLE_EDGE] + SHUFFLE [RS_194] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_344] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_187] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 53 [SIMPLE_EDGE] + SHUFFLE [RS_193] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_362] (rows=633595212 width=88) + Conds:RS_190._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 52 [SIMPLE_EDGE] + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_183] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_342] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_181] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 57 [SIMPLE_EDGE] + SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_186] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_343] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_184] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 61 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_224] + Group By Operator [GBY_220] (rows=316797606 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 60 [SIMPLE_EDGE] + SHUFFLE [RS_219] + PartitionCols:_col0 + Group By Operator [GBY_218] (rows=633595212 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_216] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_364] (rows=633595212 width=88) + Conds:RS_213._col0=RS_214._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 59 [SIMPLE_EDGE] + SHUFFLE [RS_213] + PartitionCols:_col0 + Select Operator [SEL_209] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_345] (rows=575995635 width=88) + predicate:ss_customer_sk is not null + TableScan [TS_207] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 62 [SIMPLE_EDGE] + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_212] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_346] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_210] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_254] - Group By Operator [GBY_253] (rows=1 width=112) + Reduce Output Operator [RS_242] + Group By Operator [GBY_241] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_124] (rows=383314495 width=135) + Select Operator [SEL_118] (rows=383314495 width=135) Output:["_col0"] - Merge Join Operator [MERGEJOIN_381] (rows=383314495 width=135) - Conds:RS_121._col1=RS_122._col0(Inner),Output:["_col3","_col4"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_122] + Merge Join Operator [MERGEJOIN_368] (rows=383314495 width=135) + Conds:RS_115._col1=RS_116._col0(Inner),Output:["_col3","_col4"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_116] PartitionCols:_col0 - Group By Operator [GBY_113] (rows=52799601 width=322) - Output:["_col0"],keys:KEY._col0 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_112] - PartitionCols:_col0 - Group By Operator [GBY_111] (rows=105599202 width=322) - Output:["_col0"],keys:_col2 - Select Operator [SEL_110] (rows=105599202 width=322) - Output:["_col2"] - Filter Operator [FIL_109] (rows=105599202 width=322) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_380] (rows=316797606 width=322) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_106] - Merge Join Operator [MERGEJOIN_379] (rows=1 width=233) - Conds:(Left Outer),Output:["_col1"] - <-Reducer 18 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_103] - Select Operator [SEL_61] (rows=1 width=8) - Filter Operator [FIL_60] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_58] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_57] - Group By Operator [GBY_56] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_53] (rows=348477374 width=88) - Group By Operator [GBY_52] (rows=348477374 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col0 - Group By Operator [GBY_50] (rows=696954748 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_48] (rows=696954748 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_365] (rows=696954748 width=88) - Conds:RS_45._col1=RS_46._col0(Inner),Output:["_col6"] - <-Map 23 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col0 - Select Operator [SEL_41] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_340] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_39] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_364] (rows=633595212 width=88) - Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_35] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_338] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_33] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Map 22 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0 - Select Operator [SEL_38] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_339] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_36] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_104] - Group By Operator [GBY_86] (rows=1 width=224) - Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_85] - Group By Operator [GBY_84] (rows=1 width=224) - Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_82] (rows=348477374 width=88) - Output:["_col1"] - Group By Operator [GBY_81] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_80] - PartitionCols:_col0 - Group By Operator [GBY_79] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_77] (rows=696954748 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_367] (rows=696954748 width=88) - Conds:RS_74._col1=RS_75._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 30 [SIMPLE_EDGE] - SHUFFLE [RS_75] - PartitionCols:_col0 - Select Operator [SEL_70] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_343] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_68] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_74] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_366] (rows=633595212 width=88) - Conds:RS_71._col0=RS_72._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 24 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col0 - Select Operator [SEL_64] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_341] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_62] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 29 [SIMPLE_EDGE] - SHUFFLE [RS_72] - PartitionCols:_col0 - Select Operator [SEL_67] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_342] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_65] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_107] - Group By Operator [GBY_101] (rows=316797606 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_100] - PartitionCols:_col0 - Group By Operator [GBY_99] (rows=633595212 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_97] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_368] (rows=633595212 width=88) - Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_94] - PartitionCols:_col0 - Select Operator [SEL_90] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_344] (rows=575995635 width=88) - predicate:ss_customer_sk is not null - TableScan [TS_88] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 34 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col0 - Select Operator [SEL_93] (rows=80000000 width=860) + Select Operator [SEL_108] (rows=105599202 width=321) + Output:["_col0"] + Filter Operator [FIL_107] (rows=105599202 width=321) + predicate:(_col3 > (0.95 * _col1)) + Merge Join Operator [MERGEJOIN_367] (rows=316797606 width=321) + Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_103] + Select Operator [SEL_61] (rows=1 width=8) + Filter Operator [FIL_60] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_58] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_57] + Group By Operator [GBY_56] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_53] (rows=348477374 width=88) + Group By Operator [GBY_52] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col0 + Group By Operator [GBY_50] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_48] (rows=696954748 width=88) Output:["_col0"] - Filter Operator [FIL_345] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_91] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + Merge Join Operator [MERGEJOIN_353] (rows=696954748 width=88) + Conds:RS_45._col1=RS_46._col0(Inner),Output:["_col6"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Select Operator [SEL_41] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_328] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_39] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_352] (rows=633595212 width=88) + Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_35] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_326] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_33] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Select Operator [SEL_38] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_327] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_36] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 26 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_104] + Group By Operator [GBY_86] (rows=1 width=224) + Output:["_col0"],aggregations:["max(VALUE._col0)"] + <-Reducer 25 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_85] + Group By Operator [GBY_84] (rows=1 width=224) + Output:["_col0"],aggregations:["max(_col1)"] + Select Operator [SEL_82] (rows=348477374 width=88) + Output:["_col1"] + Group By Operator [GBY_81] (rows=348477374 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col0 + Group By Operator [GBY_79] (rows=696954748 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_77] (rows=696954748 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_355] (rows=696954748 width=88) + Conds:RS_74._col1=RS_75._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 28 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_331] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_68] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_74] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_354] (rows=633595212 width=88) + Conds:RS_71._col0=RS_72._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 22 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_329] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_62] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col0 + Select Operator [SEL_67] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_330] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_65] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_105] + Group By Operator [GBY_101] (rows=316797606 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_100] + PartitionCols:_col0 + Group By Operator [GBY_99] (rows=633595212 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_97] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_356] (rows=633595212 width=88) + Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 29 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col0 + Select Operator [SEL_90] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_332] (rows=575995635 width=88) + predicate:ss_customer_sk is not null + TableScan [TS_88] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 32 [SIMPLE_EDGE] + SHUFFLE [RS_95] + PartitionCols:_col0 + Select Operator [SEL_93] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_333] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_91] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_121] + SHUFFLE [RS_115] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_377] (rows=348467716 width=135) - Conds:RS_118._col2=RS_119._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_365] (rows=348467716 width=135) + Conds:RS_112._col2=RS_113._col0(Inner),Output:["_col1","_col3","_col4"] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_119] + SHUFFLE [RS_113] PartitionCols:_col0 Group By Operator [GBY_31] (rows=58079562 width=88) Output:["_col0"],keys:_col1 @@ -586,7 +558,7 @@ Stage-0 Output:["_col1"] Filter Operator [FIL_26] (rows=116159124 width=88) predicate:(_col3 > 4) - Select Operator [SEL_359] (rows=348477374 width=88) + Select Operator [SEL_347] (rows=348477374 width=88) Output:["_col0","_col3"] Group By Operator [GBY_25] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 @@ -597,28 +569,28 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 Select Operator [SEL_21] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_363] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_351] (rows=696954748 width=88) Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col3","_col5","_col6"] <-Map 13 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_14] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_337] (rows=462000 width=1436) + Filter Operator [FIL_325] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_12] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_362] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_350] (rows=633595212 width=88) Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col3"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 Select Operator [SEL_11] (rows=36525 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_336] (rows=36525 width=1119) + Filter Operator [FIL_324] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"] @@ -627,30 +599,30 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_8] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_335] (rows=575995635 width=88) + Filter Operator [FIL_323] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null) TableScan [TS_6] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_118] + SHUFFLE [RS_112] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_361] (rows=316788826 width=135) - Conds:RS_115._col0=RS_116._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_349] (rows=316788826 width=135) + Conds:RS_109._col0=RS_110._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_115] + SHUFFLE [RS_109] PartitionCols:_col0 Select Operator [SEL_2] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_333] (rows=287989836 width=135) + Filter Operator [FIL_321] (rows=287989836 width=135) predicate:(cs_item_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_116] + SHUFFLE [RS_110] PartitionCols:_col0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_334] (rows=18262 width=1119) + Filter Operator [FIL_322] (rows=18262 width=1119) predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] diff --git a/ql/src/test/results/clientpositive/router_join_ppr.q.out b/ql/src/test/results/clientpositive/router_join_ppr.q.out index cc2b07e..f17959b 100644 --- a/ql/src/test/results/clientpositive/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/router_join_ppr.q.out @@ -30,41 +30,41 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false TableScan alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -211,149 +211,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /src [$hdt$_0:a] /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -373,8 +266,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -388,8 +279,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 @@ -435,18 +324,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -456,18 +345,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -624,42 +513,38 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -737,18 +622,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -758,18 +643,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -926,38 +811,34 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out index d2a879d..84b9250 100644 --- a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out @@ -202,7 +202,7 @@ STAGE PLANS: alias: vectortab_b_1korc Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) + predicate: (s is not null and dt is not null and ts is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) @@ -226,8 +226,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean - predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 12) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean + predicate: (s is not null and dt is not null and ts is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time)