diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml
index ca5028d..9fe829d 100644
--- a/druid-handler/pom.xml
+++ b/druid-handler/pom.xml
@@ -212,6 +212,17 @@
org.apache.calcite
calcite-druid
${calcite.version}
+
+
+ org.apache.calcite.avatica
+ avatica-core
+
+
+
+
+ org.apache.calcite.avatica
+ avatica
+ ${avatica.version}
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
index 0b35428..be374af 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
@@ -170,7 +170,11 @@ private static String createSelectStarQuery(String dataSource) throws IOExceptio
// Create Select query
SelectQueryBuilder builder = new Druids.SelectQueryBuilder();
builder.dataSource(dataSource);
- builder.intervals(Arrays.asList(DruidTable.DEFAULT_INTERVAL));
+ final List intervals = Arrays.asList();
+ new Interval(DruidTable.DEFAULT_INTERVAL.getStartMillis(),
+ DruidTable.DEFAULT_INTERVAL.getEndMillis(),
+ ISOChronology.getInstanceUTC());
+ builder.intervals(intervals);
builder.pagingSpec(PagingSpec.newSpec(1));
Map context = new HashMap<>();
context.put(Constants.DRUID_QUERY_FETCH, false);
@@ -413,11 +417,15 @@ private static String createSelectStarQuery(String dataSource) throws IOExceptio
private static List> createSplitsIntervals(List intervals, int numSplits
) {
- final long totalTime = DruidDateTimeUtils.extractTotalTime(intervals);
+
long startTime = intervals.get(0).getStartMillis();
long endTime = startTime;
long currTime = 0;
List> newIntervals = new ArrayList<>();
+ long totalTime = 0;
+ for (Interval interval: intervals) {
+ totalTime += interval.getEndMillis() - interval.getStartMillis();
+ }
for (int i = 0, posIntervals = 0; i < numSplits; i++) {
final long rangeSize = Math.round((double) (totalTime * (i + 1)) / numSplits) -
Math.round((double) (totalTime * i) / numSplits);
diff --git a/pom.xml b/pom.xml
index 3ddec7a..3431dfd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -112,10 +112,10 @@
3.5.2
1.5.6
0.1
- 1.8.0
+ 1.10.0-SNAPSHOT
1.7.7
0.8.0.RELEASE
- 1.10.0
+ 1.12.0-SNAPSHOT
4.2.1
4.1.6
4.1.7
diff --git a/ql/pom.xml b/ql/pom.xml
index 7db0ede..c541538 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -379,12 +379,22 @@
com.fasterxml.jackson.core
jackson-core
+
+ org.apache.calcite.avatica
+ avatica-core
+
org.apache.calcite
calcite-druid
${calcite.version}
+
+
+ org.apache.calcite.avatica
+ avatica-core
+
+
org.apache.calcite.avatica
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
index 1d78b4c..b121eea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
@@ -34,6 +34,7 @@
import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.adapter.druid.DruidSchema;
import org.apache.calcite.adapter.druid.DruidTable;
+import org.apache.calcite.adapter.druid.LocalInterval;
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptMaterialization;
@@ -310,7 +311,7 @@ private static RelNode createTableScan(Table viewTable) {
}
metrics.add(field.getName());
}
- List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
+ List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false),
dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java
index 9a65de3..d0b1757 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite;
+import org.apache.calcite.config.CalciteConnectionConfig;
import org.apache.calcite.plan.Context;
import org.apache.calcite.rel.RelNode;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
@@ -27,11 +28,14 @@
public class HivePlannerContext implements Context {
private HiveAlgorithmsConf algoConfig;
private HiveRulesRegistry registry;
+ private CalciteConnectionConfig calciteConfig;
private Set corrScalarRexSQWithAgg;
- public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, Set corrScalarRexSQWithAgg) {
+ public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry,
+ CalciteConnectionConfig calciteConfig, Set corrScalarRexSQWithAgg) {
this.algoConfig = algoConfig;
this.registry = registry;
+ this.calciteConfig = calciteConfig;
// this is to keep track if a subquery is correlated and contains aggregate
// this is computed in CalcitePlanner while planning and is later required by subuery remove rule
// hence this is passed using HivePlannerContext
@@ -45,6 +49,9 @@ public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry regis
if (clazz.isInstance(registry)) {
return clazz.cast(registry);
}
+ if (clazz.isInstance(calciteConfig)) {
+ return clazz.cast(calciteConfig);
+ }
if(clazz.isInstance(corrScalarRexSQWithAgg)) {
return clazz.cast(corrScalarRexSQWithAgg);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
index 4edc4df..0b94b8a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
@@ -24,6 +24,7 @@
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlTypeTransforms;
import com.google.common.collect.Sets;
@@ -42,9 +43,10 @@
Sets.newHashSet(YEAR, QUARTER, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND);
private HiveExtractDate(String name) {
- super(name, SqlKind.EXTRACT, ReturnTypes.INTEGER_NULLABLE, null,
- OperandTypes.INTERVALINTERVAL_INTERVALDATETIME,
- SqlFunctionCategory.SYSTEM);
+ super(name, SqlKind.EXTRACT,
+ ReturnTypes.cascade(ReturnTypes.INTEGER, SqlTypeTransforms.FORCE_NULLABLE), null,
+ OperandTypes.INTERVALINTERVAL_INTERVALDATETIME,
+ SqlFunctionCategory.SYSTEM);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java
index 38d7906..81de33f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java
@@ -21,6 +21,7 @@
import java.util.List;
import org.apache.calcite.plan.RelOptMaterialization;
+import org.apache.calcite.plan.RelOptMaterializations;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
@@ -77,7 +78,7 @@ protected void apply(RelOptRuleCall call, Project project, Filter filter, TableS
// Costing is done in transformTo(), so we call it repeatedly with all applicable
// materialized views and cheapest one will be picked
List applicableMaterializations =
- VolcanoPlanner.getApplicableMaterializations(root, materializations);
+ RelOptMaterializations.getApplicableMaterializations(root, materializations);
for (RelOptMaterialization materialization : applicableMaterializations) {
List subs = new MaterializedViewSubstitutionVisitor(
materialization.queryRel, root, relBuilderFactory).go(materialization.tableRel);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
index 69e157e..9bcdd0c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
@@ -165,7 +165,7 @@ public RelOptPredicateList getPredicates(Project project, RelMetadataQuery mq) {
rexBuilder.makeInputRef(project, expr.i), expr.e));
}
}
- return RelOptPredicateList.of(projectPullUpPredicates);
+ return RelOptPredicateList.of(rexBuilder, projectPullUpPredicates);
}
/** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */
@@ -202,6 +202,7 @@ public RelOptPredicateList getPredicates(Aggregate agg, RelMetadataQuery mq) {
final RelNode input = agg.getInput();
final RelOptPredicateList inputInfo = mq.getPulledUpPredicates(input);
final List aggPullUpPredicates = new ArrayList<>();
+ final RexBuilder rexBuilder = agg.getCluster().getRexBuilder();
ImmutableBitSet groupKeys = agg.getGroupSet();
Mapping m = Mappings.create(MappingType.PARTIAL_FUNCTION,
@@ -219,7 +220,7 @@ public RelOptPredicateList getPredicates(Aggregate agg, RelMetadataQuery mq) {
aggPullUpPredicates.add(r);
}
}
- return RelOptPredicateList.of(aggPullUpPredicates);
+ return RelOptPredicateList.of(rexBuilder, aggPullUpPredicates);
}
/**
@@ -271,7 +272,7 @@ public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) {
if (!disjPred.isAlwaysTrue()) {
preds.add(disjPred);
}
- return RelOptPredicateList.of(preds);
+ return RelOptPredicateList.of(rB, preds);
}
/**
@@ -411,6 +412,7 @@ public RelOptPredicateList inferPredicates(
final JoinRelType joinType = joinRel.getJoinType();
final List leftPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(leftChildPredicates));
final List rightPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(rightChildPredicates));
+ final RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
switch (joinType) {
case INNER:
case LEFT:
@@ -476,13 +478,13 @@ public RelOptPredicateList inferPredicates(
pulledUpPredicates = Iterables.concat(leftPreds, rightPreds,
RelOptUtil.conjunctions(joinRel.getCondition()), inferredPredicates);
}
- return RelOptPredicateList.of(
+ return RelOptPredicateList.of(rexBuilder,
pulledUpPredicates, leftInferredPredicates, rightInferredPredicates);
case LEFT:
- return RelOptPredicateList.of(
+ return RelOptPredicateList.of(rexBuilder,
leftPreds, EMPTY_LIST, rightInferredPredicates);
case RIGHT:
- return RelOptPredicateList.of(
+ return RelOptPredicateList.of(rexBuilder,
rightPreds, leftInferredPredicates, EMPTY_LIST);
default:
assert inferredPredicates.size() == 0;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
index 0dc0c24..79b6522 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
@@ -37,6 +37,8 @@
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
public class ASTBuilder {
@@ -269,19 +271,21 @@ public static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral)
type = ((Boolean) val).booleanValue() ? HiveParser.KW_TRUE : HiveParser.KW_FALSE;
break;
case DATE: {
- val = literal.getValue();
+ final Calendar c = (Calendar) literal.getValue();
+ final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone()));
type = HiveParser.TOK_DATELITERAL;
DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
- val = df.format(((Calendar) val).getTime());
+ val = df.format(dt.withZoneRetainFields(DateTimeZone.getDefault()).toDate());
val = "'" + val + "'";
}
break;
case TIME:
case TIMESTAMP: {
- val = literal.getValue();
+ final Calendar c = (Calendar) literal.getValue();
+ final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone()));
type = HiveParser.TOK_TIMESTAMPLITERAL;
DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
- val = df.format(((Calendar) val).getTime());
+ val = df.format(dt.withZoneRetainFields(DateTimeZone.getDefault()).toDate());
val = "'" + val + "'";
}
break;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index 27990a2..165f8c4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -24,7 +24,6 @@
import java.util.Map;
import org.apache.calcite.adapter.druid.DruidQuery;
-import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelVisitor;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
index e840938..b1efbbd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
import java.math.BigDecimal;
-import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Calendar;
@@ -75,6 +74,8 @@
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -291,16 +292,17 @@ public ExprNodeDesc visitLiteral(RexLiteral literal) {
case DOUBLE:
return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo,
Double.valueOf(((Number) literal.getValue3()).doubleValue()));
- case DATE:
+ case DATE: {
+ final Calendar c = (Calendar) literal.getValue();
return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo,
- new Date(((Calendar)literal.getValue()).getTimeInMillis()));
+ new java.sql.Date(c.getTimeInMillis()));
+ }
case TIME:
case TIMESTAMP: {
- Object value = literal.getValue3();
- if (value instanceof Long) {
- value = new Timestamp((Long)value);
- }
- return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value);
+ final Calendar c = (Calendar) literal.getValue();
+ final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone()));
+ return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo,
+ new Timestamp(dt.getMillis()));
}
case BINARY:
return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3());
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index a05b89c..26c3bc8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -23,11 +23,12 @@
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
-import java.util.GregorianCalendar;
import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
+import org.apache.calcite.avatica.util.DateTimeUtils;
import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.plan.RelOptCluster;
@@ -38,8 +39,8 @@
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexSubQuery;
+import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlCollation;
import org.apache.calcite.sql.SqlIntervalQualifier;
import org.apache.calcite.sql.SqlKind;
@@ -96,6 +97,8 @@
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
@@ -634,20 +637,22 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case DATE:
- Calendar cal = new GregorianCalendar();
- cal.setTime((Date) value);
- calciteLiteral = rexBuilder.makeDateLiteral(cal);
- break;
- case TIMESTAMP:
- Calendar c = null;
- if (value instanceof Calendar) {
- c = (Calendar)value;
- } else {
- c = Calendar.getInstance();
- c.setTimeInMillis(((Timestamp)value).getTime());
- }
- calciteLiteral = rexBuilder.makeTimestampLiteral(c, RelDataType.PRECISION_NOT_SPECIFIED);
- break;
+ final Calendar cal = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault());
+ cal.setTime((Date) value);
+ calciteLiteral = rexBuilder.makeDateLiteral(cal);
+ break;
+ case TIMESTAMP:
+ final Calendar calt = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault());
+ if (value instanceof Calendar) {
+ final Calendar c = (Calendar) value;
+ long timeMs = c.getTimeInMillis();
+ calt.setTimeInMillis(timeMs - c.getTimeZone().getOffset(timeMs));
+ } else {
+ final Timestamp ts = (Timestamp) value;
+ calt.setTimeInMillis(ts.getTime() - (ts.getTimezoneOffset() * 60 * 1000));
+ }
+ calciteLiteral = rexBuilder.makeTimestampLiteral(calt, RelDataType.PRECISION_NOT_SPECIFIED);
+ break;
case INTERVAL_YEAR_MONTH:
// Calcite year-month literal value is months as BigDecimal
BigDecimal totalMonths = BigDecimal.valueOf(((HiveIntervalYearMonth) value).getTotalMonths());
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 21bf020..ed3d526 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -38,6 +38,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -50,10 +51,12 @@
import org.apache.calcite.adapter.druid.DruidRules;
import org.apache.calcite.adapter.druid.DruidSchema;
import org.apache.calcite.adapter.druid.DruidTable;
+import org.apache.calcite.adapter.druid.LocalInterval;
+import org.apache.calcite.config.CalciteConnectionConfig;
+import org.apache.calcite.config.CalciteConnectionConfigImpl;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptMaterialization;
import org.apache.calcite.plan.RelOptPlanner;
-import org.apache.calcite.plan.RelOptPlanner.Executor;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptSchema;
import org.apache.calcite.plan.RelOptUtil;
@@ -93,6 +96,7 @@
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rel.type.RelDataTypeImpl;
import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexExecutor;
import org.apache.calcite.rex.RexFieldCollation;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
@@ -1298,7 +1302,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory);
HiveRulesRegistry registry = new HiveRulesRegistry();
- HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, corrScalarRexSQWithAgg);
+ CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(new Properties());
+ HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig,
+ corrScalarRexSQWithAgg);
RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext);
final RexBuilder rexBuilder = cluster.getRexBuilder();
final RelOptCluster optCluster = RelOptCluster.create(planner, rexBuilder);
@@ -1323,10 +1329,16 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
}
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation");
+ // Create executor
+ RexExecutor executorProvider = new HiveRexExecutorImpl(optCluster);
+ calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider);
+
// We need to get the ColumnAccessInfo and viewToTableSchema for views.
HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
HiveRelFactories.HIVE_BUILDER.create(optCluster, null), this.columnAccessInfo,
this.viewProjectToTableSchema);
+
+ //basePlan.getCluster().getPlanner().setExecutor(executorProvider);
fieldTrimmer.trim(calciteGenPlan);
// Create and set MD provider
@@ -1334,9 +1346,6 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
RelMetadataQuery.THREAD_PROVIDERS.set(
JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
- // Create executor
- Executor executorProvider = new HiveRexExecutorImpl(optCluster);
-
//Remove subquery
LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null,
@@ -1558,7 +1567,7 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) {
* executor
* @return
*/
- private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider, Executor executorProvider) {
+ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) {
// TODO: Decorelation of subquery should be done before attempting
// Partition Pruning; otherwise Expression evaluation may try to execute
// corelated sub query.
@@ -1576,13 +1585,13 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
"Calcite: HiveProjectOverIntersectRemoveRule and HiveIntersectMerge rules");
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.BOTTOM_UP,
+ basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
HiveIntersectRewriteRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: HiveIntersectRewrite rule");
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.BOTTOM_UP,
+ basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
HiveExceptRewriteRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: HiveExceptRewrite rule");
@@ -1595,7 +1604,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
// Its not clear, if this rewrite is always performant on MR, since extra map phase
// introduced for 2nd MR job may offset gains of this multi-stage aggregation.
// We need a cost model for MR to enable this on MR.
- basePlan = hepPlan(basePlan, true, mdProvider, null, HiveExpandDistinctAggregatesRule.INSTANCE);
+ basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveExpandDistinctAggregatesRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, Distinct aggregate rewrite");
}
@@ -1606,7 +1615,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
// Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or
// ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.ARBITRARY,
+ basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.ARBITRARY,
new HivePreFilteringRule(maxCNFNodeCount));
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF");
@@ -1661,10 +1670,10 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE);
final long reductionTuples = HiveConf.getLongVar(conf,
HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES);
- basePlan = hepPlan(basePlan, true, mdProvider, null, HiveSortMergeRule.INSTANCE,
+ basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveSortMergeRule.INSTANCE,
HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE,
HiveSortUnionReduceRule.INSTANCE);
- basePlan = hepPlan(basePlan, true, mdProvider, null, HepMatchOrder.BOTTOM_UP,
+ basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
new HiveSortRemoveRule(reductionProportion, reductionTuples),
HiveProjectSortTransposeRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
@@ -1673,14 +1682,14 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
// 5. Push Down Semi Joins
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, true, mdProvider, null, SemiJoinJoinTransposeRule.INSTANCE,
+ basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE,
SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, Push Down Semi Joins");
// 6. Apply Partition Pruning
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, null, new HivePartitionPruneRule(conf));
+ basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, new HivePartitionPruneRule(conf));
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, Partition Pruning");
@@ -1695,7 +1704,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
// 8. Merge, remove and reduce Project if possible
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, null,
+ basePlan = hepPlan(basePlan, false, mdProvider, executorProvider,
HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, Merge Project-Project");
@@ -1705,7 +1714,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
// storage (incase there are filters on non partition cols). This only
// matches FIL-PROJ-TS
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, true, mdProvider, null,
+ basePlan = hepPlan(basePlan, true, mdProvider, executorProvider,
HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID,
HiveProjectFilterPullUpConstantsRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
@@ -1725,7 +1734,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
* @return optimized RelNode
*/
private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
- RelMetadataProvider mdProvider, Executor executorProvider, RelOptRule... rules) {
+ RelMetadataProvider mdProvider, RexExecutor executorProvider, RelOptRule... rules) {
return hepPlan(basePlan, followPlanChanges, mdProvider, executorProvider,
HepMatchOrder.TOP_DOWN, rules);
}
@@ -1742,7 +1751,7 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
* @return optimized RelNode
*/
private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
- RelMetadataProvider mdProvider, Executor executorProvider, HepMatchOrder order,
+ RelMetadataProvider mdProvider, RexExecutor executorProvider, HepMatchOrder order,
RelOptRule... rules) {
RelNode optimizedRelNode = basePlan;
@@ -1769,6 +1778,7 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
if (executorProvider != null) {
basePlan.getCluster().getPlanner().setExecutor(executorProvider);
+ planner.setExecutor(executorProvider);
}
planner.setRoot(basePlan);
@@ -2248,7 +2258,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc
}
metrics.add(field.getName());
}
- List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
+ List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false),
dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals);
@@ -4143,5 +4153,4 @@ private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException {
DRUID,
NATIVE
}
-
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java
index 7229cc7..4823950 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java
@@ -61,7 +61,7 @@ public void testRuleFiredOnlyOnce() {
// Create rules registry to not trigger a rule more than once
HiveRulesRegistry registry = new HiveRulesRegistry();
- HivePlannerContext context = new HivePlannerContext(null, registry, null);
+ HivePlannerContext context = new HivePlannerContext(null, registry, null, null);
HepPlanner planner = new HepPlanner(programBuilder.build(), context);
// Cluster
diff --git a/ql/src/test/results/clientpositive/constprog2.q.out b/ql/src/test/results/clientpositive/constprog2.q.out
index 7bfd0cf..cbc5fd2 100644
--- a/ql/src/test/results/clientpositive/constprog2.q.out
+++ b/ql/src/test/results/clientpositive/constprog2.q.out
@@ -67,6 +67,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: EXPLAIN
SELECT src1.key, src1.key + 1, src2.value
FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND cast(src1.key as double) = 86
@@ -90,28 +91,22 @@ STAGE PLANS:
predicate: (UDFToDouble(key) = 86.0) (type: boolean)
Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ sort order:
Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE
TableScan
alias: src2
Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (UDFToDouble(key) = 86.0) (type: boolean)
+ predicate: (86 = key) (type: boolean)
Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: value (type: string)
+ outputColumnNames: _col1
Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ sort order:
Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reduce Operator Tree:
@@ -119,17 +114,17 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 550 Data size: 5831 Basic stats: COMPLETE Column stats: NONE
+ 0
+ 1
+ outputColumnNames: _col2
+ Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), (_col0 + 1) (type: int), _col2 (type: string)
+ expressions: 86 (type: int), 87 (type: int), _col2 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 550 Data size: 5831 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 550 Data size: 5831 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/druid_basic2.q.out b/ql/src/test/results/clientpositive/druid_basic2.q.out
index bc9410b..d54d0a5 100644
--- a/ql/src/test/results/clientpositive/druid_basic2.q.out
+++ b/ql/src/test/results/clientpositive/druid_basic2.q.out
@@ -77,7 +77,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
@@ -104,7 +104,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":["delta"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":[],"metrics":["delta"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
@@ -135,7 +135,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
@@ -166,7 +166,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
druid.query.type groupBy
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
@@ -212,7 +212,7 @@ STAGE PLANS:
alias: druid_table_1
filterExpr: language is not null (type: boolean)
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
@@ -237,7 +237,7 @@ STAGE PLANS:
alias: druid_table_1
filterExpr: language is not null (type: boolean)
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
@@ -274,7 +274,7 @@ STAGE PLANS:
columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
druid.datasource wikipedia
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
#### A masked pattern was here ####
name default.druid_table_1
@@ -300,7 +300,7 @@ STAGE PLANS:
columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
druid.datasource wikipedia
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
#### A masked pattern was here ####
name default.druid_table_1
@@ -399,7 +399,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
GatherStats: false
@@ -414,7 +414,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
@@ -442,7 +442,7 @@ STAGE PLANS:
columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
druid.datasource wikipedia
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
#### A masked pattern was here ####
name default.druid_table_1
@@ -468,7 +468,7 @@ STAGE PLANS:
columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
druid.datasource wikipedia
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
#### A masked pattern was here ####
name default.druid_table_1
@@ -545,8 +545,7 @@ LIMIT 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -554,27 +553,25 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: druid_table_1
+ properties:
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"day","dimensions":["robot","language"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
+ druid.query.type groupBy
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
Select Operator
- expressions: robot (type: string), language (type: string), __time (type: timestamp), added (type: float), delta (type: float)
- outputColumnNames: robot, language, __time, added, delta
+ expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Group By Operator
- aggregations: max(added), sum(delta)
- keys: robot (type: string), language (type: string), floor_day(__time) (type: timestamp)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Reduce Output Operator
+ key expressions: UDFToInteger(_col0) (type: int), _col2 (type: bigint)
+ null sort order: az
+ sort order: +-
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp)
- null sort order: aaa
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- tag: -1
- value expressions: _col3 (type: float), _col4 (type: double)
- auto parallelism: false
+ tag: -1
+ TopN: 10
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col1 (type: timestamp), _col3 (type: float)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -592,6 +589,8 @@ STAGE PLANS:
columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
druid.datasource wikipedia
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"day","dimensions":["robot","language"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
+ druid.query.type groupBy
#### A masked pattern was here ####
name default.druid_table_1
numFiles 0
@@ -616,6 +615,8 @@ STAGE PLANS:
columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
druid.datasource wikipedia
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"day","dimensions":["robot","language"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
+ druid.query.type groupBy
#### A masked pattern was here ####
name default.druid_table_1
numFiles 0
@@ -634,81 +635,8 @@ STAGE PLANS:
/druid_table_1 [druid_table_1]
Needs Tagging: false
Reduce Operator Tree:
- Group By Operator
- aggregations: max(VALUE._col0), sum(VALUE._col1)
- keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: timestamp)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: float), _col4 (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- column.name.delimiter ,
- columns _col0,_col1,_col2,_col3
- columns.types string,timestamp,float,double
- escape.delim \
- serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
- Reduce Output Operator
- key expressions: UDFToInteger(_col0) (type: int), _col2 (type: float)
- null sort order: az
- sort order: +-
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- tag: -1
- TopN: 10
- TopN Hash Memory Usage: 0.1
- value expressions: _col0 (type: string), _col1 (type: timestamp), _col3 (type: double)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10004
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- column.name.delimiter ,
- columns _col0,_col1,_col2,_col3
- columns.types string,timestamp,float,double
- escape.delim \
- serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- column.name.delimiter ,
- columns _col0,_col1,_col2,_col3
- columns.types string,timestamp,float,double
- escape.delim \
- serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Truncated Path -> Alias:
-#### A masked pattern was here ####
- Needs Tagging: false
- Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: float), VALUE._col2 (type: double)
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: bigint), VALUE._col2 (type: float)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Limit
@@ -726,7 +654,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1,_col2,_col3
- columns.types string:timestamp:float:double
+ columns.types string:timestamp:bigint:float
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
diff --git a/ql/src/test/results/clientpositive/druid_timeseries.q.out b/ql/src/test/results/clientpositive/druid_timeseries.q.out
index 6b2ffe9..372927b 100644
--- a/ql/src/test/results/clientpositive/druid_timeseries.q.out
+++ b/ql/src/test/results/clientpositive/druid_timeseries.q.out
@@ -79,7 +79,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"longMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"longMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -109,7 +109,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"NONE","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"none","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -139,7 +139,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"YEAR","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"year","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -169,7 +169,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"QUARTER","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"quarter","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -199,7 +199,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"MONTH","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"month","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -229,7 +229,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"WEEK","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"week","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -259,7 +259,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"DAY","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"day","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -289,7 +289,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"HOUR","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"hour","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -319,7 +319,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"MINUTE","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"minute","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -349,7 +349,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"SECOND","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"second","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -381,7 +381,7 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"HOUR","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"hour","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
druid.query.type timeseries
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
@@ -418,7 +418,7 @@ STAGE PLANS:
alias: druid_table_1
filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean)
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
@@ -495,7 +495,7 @@ STAGE PLANS:
alias: druid_table_1
filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean)
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
diff --git a/ql/src/test/results/clientpositive/druid_topn.q.out b/ql/src/test/results/clientpositive/druid_topn.q.out
index 57d6586..9ef03b4 100644
--- a/ql/src/test/results/clientpositive/druid_topn.q.out
+++ b/ql/src/test/results/clientpositive/druid_topn.q.out
@@ -85,8 +85,8 @@ STAGE PLANS:
TableScan
alias: druid_table_1
properties:
- druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"all","dimension":"robot","metric":"$f1","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":100}
- druid.query.type topN
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f1","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
+ druid.query.type groupBy
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: robot (type: string), $f1 (type: bigint), $f2 (type: float)
@@ -109,24 +109,54 @@ ORDER BY s DESC
LIMIT 100
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1
+ properties:
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"none","dimensions":["robot"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
+ druid.query.type groupBy
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: __time (type: timestamp), robot (type: string), $f2 (type: bigint), $f3 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: float)
+ sort order: -
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: timestamp), VALUE._col1 (type: string), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: timestamp), _col2 (type: bigint), _col3 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: druid_table_1
- properties:
- druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"NONE","dimension":"robot","metric":"$f3","aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":100}
- druid.query.type topN
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: EXPLAIN
SELECT robot, floor_year(`__time`), max(added), sum(variation) as s
@@ -143,24 +173,50 @@ ORDER BY s DESC
LIMIT 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1
+ properties:
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"year","dimensions":["robot"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
+ druid.query.type groupBy
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: float)
+ sort order: -
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col1 (type: timestamp), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 10
Processor Tree:
- TableScan
- alias: druid_table_1
- properties:
- druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"YEAR","dimension":"robot","metric":"$f3","aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":10}
- druid.query.type topN
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: EXPLAIN
SELECT robot, floor_month(`__time`), max(added), sum(variation) as s
@@ -177,24 +233,50 @@ ORDER BY s
LIMIT 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1
+ properties:
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"month","dimensions":["robot"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
+ druid.query.type groupBy
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: float)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col1 (type: timestamp), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 10
Processor Tree:
- TableScan
- alias: druid_table_1
- properties:
- druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"ascending"}]},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
- druid.query.type groupBy
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: EXPLAIN
SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s
@@ -211,24 +293,54 @@ ORDER BY s DESC, m DESC
LIMIT 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1
+ properties:
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"month","dimensions":["robot","namespace"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
+ druid.query.type groupBy
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float)
+ outputColumnNames: _col0, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: float), _col3 (type: bigint)
+ sort order: --
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col2 (type: timestamp)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col2 (type: timestamp), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: float)
+ outputColumnNames: _col0, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: bigint), _col4 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: druid_table_1
- properties:
- druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot","namespace"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f4","direction":"descending"},{"dimension":"$f3","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
- druid.query.type groupBy
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: EXPLAIN
SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s
@@ -245,24 +357,54 @@ ORDER BY robot ASC, m DESC
LIMIT 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1
+ properties:
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"month","dimensions":["robot","namespace"],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"]}
+ druid.query.type groupBy
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float)
+ outputColumnNames: _col0, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col3 (type: bigint)
+ sort order: +-
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: timestamp), _col4 (type: float)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: bigint), VALUE._col2 (type: float)
+ outputColumnNames: _col0, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: bigint), _col4 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: druid_table_1
- properties:
- druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot","namespace"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending"},{"dimension":"$f3","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
- druid.query.type groupBy
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: EXPLAIN
SELECT robot, floor_year(`__time`), max(added), sum(variation) as s
@@ -281,24 +423,54 @@ ORDER BY s
LIMIT 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1
+ properties:
+ druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"year","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}}
+ druid.query.type timeseries
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: __time (type: timestamp), $f1_0 (type: bigint), $f2 (type: float)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: float)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: timestamp), _col1 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: timestamp), VALUE._col1 (type: bigint), KEY.reducesinkkey0 (type: float)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: '1' (type: string), _col0 (type: timestamp), _col1 (type: bigint), _col2 (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: druid_table_1
- properties:
- druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"YEAR","dimensions":[],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f2","direction":"ascending"}]},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
- druid.query.type groupBy
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: '1' (type: string), __time (type: timestamp), $f1_0 (type: bigint), $f2 (type: float)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: EXPLAIN
SELECT robot, floor_hour(`__time`), max(added) as m, sum(variation)
@@ -333,7 +505,7 @@ STAGE PLANS:
alias: druid_table_1
filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean)
properties:
- druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}}
+ druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}}
druid.query.type select
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
index 8c6f0f1..01d60f7 100644
--- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
@@ -321,7 +321,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col4
Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((_col2 + _col4) > 2.0) or ((_col1 + 1) > 2)) (type: boolean)
+ predicate: (((_col2 + _col4) > 2) or ((_col1 + 1) > 2)) (type: boolean)
Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string)
diff --git a/ql/src/test/results/clientpositive/join_merging.q.out b/ql/src/test/results/clientpositive/join_merging.q.out
index 79a7023..ef9ca18 100644
--- a/ql/src/test/results/clientpositive/join_merging.q.out
+++ b/ql/src/test/results/clientpositive/join_merging.q.out
@@ -109,7 +109,7 @@ STAGE PLANS:
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (p_size > 10) (type: boolean)
+ predicate: ((p_size > 10) and p_partkey is not null) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_size (type: int)
@@ -124,20 +124,23 @@ STAGE PLANS:
TableScan
alias: p2
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: p_partkey (type: int), p_size (type: int)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Select Operator
+ expressions: p_partkey (type: int), p_size (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: int)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
Reduce Operator Tree:
Join Operator
condition map:
- Left Outer Join0 to 1
+ Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out
index a867bd2..841ef14 100644
--- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out
@@ -1044,7 +1044,7 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key + 1) is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (key + 1) (type: int)
@@ -1063,7 +1063,7 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key + 1) is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (key + 1) (type: int)
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out
index c08a534..b69d0bd 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out
@@ -1261,7 +1261,7 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key + 1) is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (key + 1) (type: int)
@@ -1293,7 +1293,7 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key + 1) is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (key + 1) (type: int)
diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
index 98d2328..4bdb186 100644
--- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
@@ -274,7 +274,7 @@ Stage-0
Select Operator [SEL_5] (rows=2 width=3)
Output:["_col0","_col1"]
Filter Operator [FIL_16] (rows=2 width=3)
- predicate:((id = 100) and (id = 100) is not null)
+ predicate:(id = 100)
TableScan [TS_3] (rows=5 width=3)
default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
@@ -311,7 +311,7 @@ Stage-0
Select Operator [SEL_2] (rows=5 width=20)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_15] (rows=5 width=20)
- predicate:((dimid = 100) and (dimid = 100) is not null)
+ predicate:(dimid = 100)
TableScan [TS_0] (rows=10 width=20)
default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"]
<-Map 3 [SIMPLE_EDGE] llap
@@ -322,7 +322,7 @@ Stage-0
Select Operator [SEL_5] (rows=2 width=3)
Output:["_col0","_col1"]
Filter Operator [FIL_16] (rows=2 width=3)
- predicate:((id = 100) and (id = 100) is not null)
+ predicate:(id = 100)
TableScan [TS_3] (rows=5 width=3)
default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
@@ -372,7 +372,7 @@ Stage-0
Select Operator [SEL_5] (rows=2 width=3)
Output:["_col0","_col1"]
Filter Operator [FIL_16] (rows=2 width=3)
- predicate:((id = 100) and (id = 100) is not null)
+ predicate:(id = 100)
TableScan [TS_3] (rows=5 width=3)
default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
@@ -409,7 +409,7 @@ Stage-0
Select Operator [SEL_2] (rows=5 width=20)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_15] (rows=5 width=20)
- predicate:((dimid = 100) and (dimid = 100) is not null)
+ predicate:(dimid = 100)
TableScan [TS_0] (rows=10 width=20)
default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"]
<-Map 3 [SIMPLE_EDGE] llap
@@ -420,7 +420,7 @@ Stage-0
Select Operator [SEL_5] (rows=2 width=3)
Output:["_col0","_col1"]
Filter Operator [FIL_16] (rows=2 width=3)
- predicate:((id = 100) and (id = 100) is not null)
+ predicate:(id = 100)
TableScan [TS_3] (rows=5 width=3)
default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
@@ -459,7 +459,7 @@ Stage-0
Select Operator [SEL_2] (rows=5 width=20)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_15] (rows=5 width=20)
- predicate:((dimid = 100) and (dimid = 100) is not null)
+ predicate:(dimid = 100)
TableScan [TS_0] (rows=10 width=20)
default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"]
<-Map 3 [SIMPLE_EDGE] llap
@@ -470,7 +470,7 @@ Stage-0
Select Operator [SEL_5] (rows=2 width=3)
Output:["_col0","_col1"]
Filter Operator [FIL_16] (rows=2 width=3)
- predicate:((id = 100) and (id = 100) is not null)
+ predicate:(id = 100)
TableScan [TS_3] (rows=5 width=3)
default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out
index 53c728b..54a864a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out
@@ -208,8 +208,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean
- predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean)
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 12) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean
+ predicate: (s is not null and dt is not null and ts is not null) (type: boolean)
Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time)
@@ -274,8 +274,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean
- predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean)
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 12) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean
+ predicate: (s is not null and dt is not null and ts is not null) (type: boolean)
Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time)
diff --git a/ql/src/test/results/clientpositive/mergejoins.q.out b/ql/src/test/results/clientpositive/mergejoins.q.out
index 1023f61..4a290a1 100644
--- a/ql/src/test/results/clientpositive/mergejoins.q.out
+++ b/ql/src/test/results/clientpositive/mergejoins.q.out
@@ -251,7 +251,7 @@ STAGE PLANS:
Left Outer Join1 to 2
filter predicates:
0
- 1 {(UDFToDouble(KEY.reducesinkkey0) < UDFToDouble(10))}
+ 1 {(UDFToDouble(KEY.reducesinkkey0) < 10.0)}
2
keys:
0 _col0 (type: string)
diff --git a/ql/src/test/results/clientpositive/perf/query15.q.out b/ql/src/test/results/clientpositive/perf/query15.q.out
index 4f4dcc5..a331cd7 100644
--- a/ql/src/test/results/clientpositive/perf/query15.q.out
+++ b/ql/src/test/results/clientpositive/perf/query15.q.out
@@ -5,83 +5,77 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:100
Stage-1
- Reducer 6
- File Output Operator [FS_31]
- Limit [LIM_30] (rows=100 width=135)
+ Reducer 5
+ File Output Operator [FS_30]
+ Limit [LIM_29] (rows=100 width=135)
Number of rows:100
- Select Operator [SEL_29] (rows=174233858 width=135)
+ Select Operator [SEL_28] (rows=174233858 width=135)
Output:["_col0","_col1"]
- <-Reducer 5 [SIMPLE_EDGE]
- SHUFFLE [RS_28]
- Group By Operator [GBY_26] (rows=174233858 width=135)
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_27]
+ Group By Operator [GBY_25] (rows=174233858 width=135)
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 4 [SIMPLE_EDGE]
- SHUFFLE [RS_25]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_24]
PartitionCols:_col0
- Group By Operator [GBY_24] (rows=348467716 width=135)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7
- Select Operator [SEL_23] (rows=348467716 width=135)
- Output:["_col7","_col2"]
- Merge Join Operator [MERGEJOIN_47] (rows=348467716 width=135)
- Conds:RS_20._col0=RS_21._col0(Inner),Output:["_col2","_col7"]
- <-Map 9 [SIMPLE_EDGE]
- SHUFFLE [RS_21]
- PartitionCols:_col0
- Select Operator [SEL_19] (rows=18262 width=1119)
- Output:["_col0"]
- Filter Operator [FIL_44] (rows=18262 width=1119)
- predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null)
- TableScan [TS_17] (rows=73049 width=1119)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"]
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_20]
- PartitionCols:_col0
- Select Operator [SEL_16] (rows=316788826 width=135)
- Output:["_col0","_col2","_col7"]
- Filter Operator [FIL_15] (rows=316788826 width=135)
- predicate:((substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or (_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500))
- Merge Join Operator [MERGEJOIN_46] (rows=316788826 width=135)
- Conds:RS_12._col0=RS_13._col1(Inner),Output:["_col3","_col4","_col5","_col7"]
- <-Map 8 [SIMPLE_EDGE]
- SHUFFLE [RS_13]
- PartitionCols:_col1
- Select Operator [SEL_8] (rows=287989836 width=135)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_43] (rows=287989836 width=135)
- predicate:(cs_bill_customer_sk is not null and cs_sold_date_sk is not null)
- TableScan [TS_6] (rows=287989836 width=135)
- default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_12]
- PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_45] (rows=88000001 width=860)
- Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col3","_col4"]
- <-Map 1 [SIMPLE_EDGE]
- SHUFFLE [RS_9]
- PartitionCols:_col1
- Select Operator [SEL_2] (rows=80000000 width=860)
- Output:["_col0","_col1"]
- Filter Operator [FIL_41] (rows=80000000 width=860)
- predicate:(c_customer_sk is not null and c_current_addr_sk is not null)
- TableScan [TS_0] (rows=80000000 width=860)
- default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"]
- <-Map 7 [SIMPLE_EDGE]
- SHUFFLE [RS_10]
- PartitionCols:_col0
- Select Operator [SEL_5] (rows=40000000 width=1014)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_42] (rows=40000000 width=1014)
- predicate:ca_address_sk is not null
- TableScan [TS_3] (rows=40000000 width=1014)
- default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_zip"]
+ Group By Operator [GBY_23] (rows=348467716 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col4
+ Merge Join Operator [MERGEJOIN_45] (rows=348467716 width=135)
+ Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col4","_col7"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_19]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_43] (rows=88000001 width=860)
+ Conds:RS_16._col1=RS_17._col0(Inner),Output:["_col0","_col4"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_16]
+ PartitionCols:_col1
+ Select Operator [SEL_2] (rows=80000000 width=860)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_39] (rows=80000000 width=860)
+ predicate:(c_customer_sk is not null and c_current_addr_sk is not null)
+ TableScan [TS_0] (rows=80000000 width=860)
+ default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"]
+ <-Map 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_17]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=40000000 width=1014)
+ Output:["_col0","_col2"]
+ Filter Operator [FIL_40] (rows=40000000 width=1014)
+ predicate:(((substr(ca_zip, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or (ca_state) IN ('CA', 'WA', 'GA')) and ca_address_sk is not null)
+ TableScan [TS_3] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_zip"]
+ <-Reducer 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_20]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_44] (rows=316788826 width=135)
+ Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2"]
+ <-Map 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_12]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=287989836 width=135)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_41] (rows=287989836 width=135)
+ predicate:(cs_bill_customer_sk is not null and cs_sold_date_sk is not null)
+ TableScan [TS_6] (rows=287989836 width=135)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"]
+ <-Map 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_13]
+ PartitionCols:_col0
+ Select Operator [SEL_11] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_42] (rows=18262 width=1119)
+ predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null)
+ TableScan [TS_9] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"]
diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out b/ql/src/test/results/clientpositive/perf/query23.q.out
index 85cee23..a04e5cd 100644
--- a/ql/src/test/results/clientpositive/perf/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/query23.q.out
@@ -1,7 +1,5 @@
-Warning: Shuffle Join MERGEJOIN[379][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product
-Warning: Shuffle Join MERGEJOIN[380][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product
-Warning: Shuffle Join MERGEJOIN[382][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 51' is a cross product
-Warning: Shuffle Join MERGEJOIN[383][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 52' is a cross product
+Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 19' is a cross product
+Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 49' is a cross product
PREHOOK: query: explain with frequent_ss_items as
(select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
from store_sales
@@ -107,42 +105,38 @@ Plan optimized by CBO.
Vertex dependency in root stage
Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
-Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE)
-Reducer 16 <- Map 23 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
+Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
Reducer 17 <- Reducer 16 (SIMPLE_EDGE)
Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE)
-Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 28 (CUSTOM_SIMPLE_EDGE)
+Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 26 (CUSTOM_SIMPLE_EDGE), Reducer 31 (CUSTOM_SIMPLE_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-Reducer 20 <- Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 33 (CUSTOM_SIMPLE_EDGE)
-Reducer 21 <- Reducer 20 (SIMPLE_EDGE)
-Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE)
-Reducer 26 <- Map 30 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE)
-Reducer 27 <- Reducer 26 (SIMPLE_EDGE)
-Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE)
+Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE)
+Reducer 24 <- Map 28 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE)
+Reducer 25 <- Reducer 24 (SIMPLE_EDGE)
+Reducer 26 <- Reducer 25 (CUSTOM_SIMPLE_EDGE)
Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 32 <- Map 31 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE)
-Reducer 33 <- Reducer 32 (SIMPLE_EDGE)
-Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE)
-Reducer 37 <- Reducer 36 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE)
-Reducer 38 <- Reducer 37 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE), Union 5 (CONTAINS)
-Reducer 4 <- Reducer 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
-Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE)
-Reducer 42 <- Map 45 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE)
-Reducer 43 <- Reducer 42 (SIMPLE_EDGE)
-Reducer 47 <- Map 46 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE)
-Reducer 48 <- Map 55 (SIMPLE_EDGE), Reducer 47 (SIMPLE_EDGE)
-Reducer 49 <- Reducer 48 (SIMPLE_EDGE)
-Reducer 50 <- Reducer 49 (CUSTOM_SIMPLE_EDGE)
-Reducer 51 <- Reducer 50 (CUSTOM_SIMPLE_EDGE), Reducer 60 (CUSTOM_SIMPLE_EDGE)
-Reducer 52 <- Reducer 51 (CUSTOM_SIMPLE_EDGE), Reducer 65 (CUSTOM_SIMPLE_EDGE)
-Reducer 53 <- Reducer 52 (SIMPLE_EDGE)
-Reducer 57 <- Map 56 (SIMPLE_EDGE), Map 61 (SIMPLE_EDGE)
-Reducer 58 <- Map 62 (SIMPLE_EDGE), Reducer 57 (SIMPLE_EDGE)
-Reducer 59 <- Reducer 58 (SIMPLE_EDGE)
+Reducer 30 <- Map 29 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE)
+Reducer 31 <- Reducer 30 (SIMPLE_EDGE)
+Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE)
+Reducer 35 <- Reducer 34 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE)
+Reducer 36 <- Reducer 35 (SIMPLE_EDGE), Reducer 49 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 40 <- Map 43 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE)
+Reducer 41 <- Reducer 40 (SIMPLE_EDGE)
+Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE)
+Reducer 46 <- Map 51 (SIMPLE_EDGE), Reducer 45 (SIMPLE_EDGE)
+Reducer 47 <- Reducer 46 (SIMPLE_EDGE)
+Reducer 48 <- Reducer 47 (CUSTOM_SIMPLE_EDGE)
+Reducer 49 <- Reducer 48 (CUSTOM_SIMPLE_EDGE), Reducer 56 (CUSTOM_SIMPLE_EDGE), Reducer 61 (CUSTOM_SIMPLE_EDGE)
+Reducer 53 <- Map 52 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE)
+Reducer 54 <- Map 58 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE)
+Reducer 55 <- Reducer 54 (SIMPLE_EDGE)
+Reducer 56 <- Reducer 55 (CUSTOM_SIMPLE_EDGE)
Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE)
-Reducer 60 <- Reducer 59 (CUSTOM_SIMPLE_EDGE)
-Reducer 64 <- Map 63 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE)
-Reducer 65 <- Reducer 64 (SIMPLE_EDGE)
+Reducer 60 <- Map 59 (SIMPLE_EDGE), Map 62 (SIMPLE_EDGE)
+Reducer 61 <- Reducer 60 (SIMPLE_EDGE)
Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
Stage-0
@@ -150,435 +144,413 @@ Stage-0
limit:100
Stage-1
Reducer 6
- File Output Operator [FS_258]
- Limit [LIM_257] (rows=1 width=112)
+ File Output Operator [FS_246]
+ Limit [LIM_245] (rows=1 width=112)
Number of rows:100
- Group By Operator [GBY_255] (rows=1 width=112)
+ Group By Operator [GBY_243] (rows=1 width=112)
Output:["_col0"],aggregations:["sum(VALUE._col0)"]
<-Union 5 [CUSTOM_SIMPLE_EDGE]
- <-Reducer 38 [CONTAINS]
- Reduce Output Operator [RS_254]
- Group By Operator [GBY_253] (rows=1 width=112)
+ <-Reducer 36 [CONTAINS]
+ Reduce Output Operator [RS_242]
+ Group By Operator [GBY_241] (rows=1 width=112)
Output:["_col0"],aggregations:["sum(_col0)"]
- Select Operator [SEL_249] (rows=191667562 width=135)
+ Select Operator [SEL_237] (rows=191667562 width=135)
Output:["_col0"]
- Merge Join Operator [MERGEJOIN_384] (rows=191667562 width=135)
- Conds:RS_246._col2=RS_247._col0(Inner),Output:["_col3","_col4"]
- <-Reducer 37 [SIMPLE_EDGE]
- SHUFFLE [RS_246]
+ Merge Join Operator [MERGEJOIN_370] (rows=191667562 width=135)
+ Conds:RS_234._col2=RS_235._col0(Inner),Output:["_col3","_col4"]
+ <-Reducer 35 [SIMPLE_EDGE]
+ SHUFFLE [RS_234]
PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_378] (rows=174243235 width=135)
- Conds:RS_243._col1=RS_244._col0(Inner),Output:["_col2","_col3","_col4"]
- <-Reducer 36 [SIMPLE_EDGE]
- SHUFFLE [RS_243]
+ Merge Join Operator [MERGEJOIN_366] (rows=174243235 width=135)
+ Conds:RS_231._col1=RS_232._col0(Inner),Output:["_col2","_col3","_col4"]
+ <-Reducer 34 [SIMPLE_EDGE]
+ SHUFFLE [RS_231]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_369] (rows=158402938 width=135)
- Conds:RS_240._col0=RS_241._col0(Inner),Output:["_col1","_col2","_col3","_col4"]
- <-Map 35 [SIMPLE_EDGE]
- SHUFFLE [RS_240]
+ Merge Join Operator [MERGEJOIN_357] (rows=158402938 width=135)
+ Conds:RS_228._col0=RS_229._col0(Inner),Output:["_col1","_col2","_col3","_col4"]
+ <-Map 33 [SIMPLE_EDGE]
+ SHUFFLE [RS_228]
PartitionCols:_col0
- Select Operator [SEL_127] (rows=144002668 width=135)
+ Select Operator [SEL_121] (rows=144002668 width=135)
Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_346] (rows=144002668 width=135)
+ Filter Operator [FIL_334] (rows=144002668 width=135)
predicate:(ws_item_sk is not null and ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
- TableScan [TS_125] (rows=144002668 width=135)
+ TableScan [TS_119] (rows=144002668 width=135)
default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"]
- <-Map 39 [SIMPLE_EDGE]
- SHUFFLE [RS_241]
+ <-Map 37 [SIMPLE_EDGE]
+ SHUFFLE [RS_229]
PartitionCols:_col0
- Select Operator [SEL_130] (rows=18262 width=1119)
+ Select Operator [SEL_124] (rows=18262 width=1119)
Output:["_col0"]
- Filter Operator [FIL_347] (rows=18262 width=1119)
+ Filter Operator [FIL_335] (rows=18262 width=1119)
predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null)
- TableScan [TS_128] (rows=73049 width=1119)
+ TableScan [TS_122] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
- <-Reducer 43 [SIMPLE_EDGE]
- SHUFFLE [RS_244]
+ <-Reducer 41 [SIMPLE_EDGE]
+ SHUFFLE [RS_232]
PartitionCols:_col0
- Group By Operator [GBY_156] (rows=58079562 width=88)
+ Group By Operator [GBY_150] (rows=58079562 width=88)
Output:["_col0"],keys:_col1
- Select Operator [SEL_152] (rows=116159124 width=88)
+ Select Operator [SEL_146] (rows=116159124 width=88)
Output:["_col1"]
- Filter Operator [FIL_151] (rows=116159124 width=88)
+ Filter Operator [FIL_145] (rows=116159124 width=88)
predicate:(_col3 > 4)
- Select Operator [SEL_360] (rows=348477374 width=88)
+ Select Operator [SEL_348] (rows=348477374 width=88)
Output:["_col0","_col3"]
- Group By Operator [GBY_150] (rows=348477374 width=88)
+ Group By Operator [GBY_144] (rows=348477374 width=88)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
- <-Reducer 42 [SIMPLE_EDGE]
- SHUFFLE [RS_149]
+ <-Reducer 40 [SIMPLE_EDGE]
+ SHUFFLE [RS_143]
PartitionCols:_col0
- Group By Operator [GBY_148] (rows=696954748 width=88)
+ Group By Operator [GBY_142] (rows=696954748 width=88)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2
- Select Operator [SEL_146] (rows=696954748 width=88)
+ Select Operator [SEL_140] (rows=696954748 width=88)
Output:["_col0","_col1","_col2"]
- Merge Join Operator [MERGEJOIN_371] (rows=696954748 width=88)
- Conds:RS_143._col1=RS_144._col0(Inner),Output:["_col3","_col5","_col6"]
- <-Map 45 [SIMPLE_EDGE]
- SHUFFLE [RS_144]
+ Merge Join Operator [MERGEJOIN_359] (rows=696954748 width=88)
+ Conds:RS_137._col1=RS_138._col0(Inner),Output:["_col3","_col5","_col6"]
+ <-Map 43 [SIMPLE_EDGE]
+ SHUFFLE [RS_138]
PartitionCols:_col0
- Select Operator [SEL_139] (rows=462000 width=1436)
+ Select Operator [SEL_133] (rows=462000 width=1436)
Output:["_col0","_col1"]
- Filter Operator [FIL_350] (rows=462000 width=1436)
+ Filter Operator [FIL_338] (rows=462000 width=1436)
predicate:i_item_sk is not null
- TableScan [TS_137] (rows=462000 width=1436)
+ TableScan [TS_131] (rows=462000 width=1436)
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"]
- <-Reducer 41 [SIMPLE_EDGE]
- SHUFFLE [RS_143]
+ <-Reducer 39 [SIMPLE_EDGE]
+ SHUFFLE [RS_137]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_370] (rows=633595212 width=88)
- Conds:RS_140._col0=RS_141._col0(Inner),Output:["_col1","_col3"]
- <-Map 40 [SIMPLE_EDGE]
- SHUFFLE [RS_140]
+ Merge Join Operator [MERGEJOIN_358] (rows=633595212 width=88)
+ Conds:RS_134._col0=RS_135._col0(Inner),Output:["_col1","_col3"]
+ <-Map 38 [SIMPLE_EDGE]
+ SHUFFLE [RS_134]
PartitionCols:_col0
- Select Operator [SEL_133] (rows=575995635 width=88)
+ Select Operator [SEL_127] (rows=575995635 width=88)
Output:["_col0","_col1"]
- Filter Operator [FIL_348] (rows=575995635 width=88)
+ Filter Operator [FIL_336] (rows=575995635 width=88)
predicate:(ss_sold_date_sk is not null and ss_item_sk is not null)
- TableScan [TS_131] (rows=575995635 width=88)
+ TableScan [TS_125] (rows=575995635 width=88)
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"]
- <-Map 44 [SIMPLE_EDGE]
- SHUFFLE [RS_141]
+ <-Map 42 [SIMPLE_EDGE]
+ SHUFFLE [RS_135]
PartitionCols:_col0
- Select Operator [SEL_136] (rows=36525 width=1119)
+ Select Operator [SEL_130] (rows=36525 width=1119)
Output:["_col0","_col1"]
- Filter Operator [FIL_349] (rows=36525 width=1119)
+ Filter Operator [FIL_337] (rows=36525 width=1119)
predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
- TableScan [TS_134] (rows=73049 width=1119)
+ TableScan [TS_128] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"]
- <-Reducer 53 [SIMPLE_EDGE]
- SHUFFLE [RS_247]
+ <-Reducer 49 [SIMPLE_EDGE]
+ SHUFFLE [RS_235]
PartitionCols:_col0
- Group By Operator [GBY_238] (rows=52799601 width=322)
- Output:["_col0"],keys:KEY._col0
- <-Reducer 52 [SIMPLE_EDGE]
- SHUFFLE [RS_237]
- PartitionCols:_col0
- Group By Operator [GBY_236] (rows=105599202 width=322)
- Output:["_col0"],keys:_col2
- Select Operator [SEL_235] (rows=105599202 width=322)
- Output:["_col2"]
- Filter Operator [FIL_234] (rows=105599202 width=322)
- predicate:(_col3 > (0.95 * _col1))
- Merge Join Operator [MERGEJOIN_383] (rows=316797606 width=322)
- Conds:(Inner),Output:["_col1","_col2","_col3"]
- <-Reducer 51 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_231]
- Merge Join Operator [MERGEJOIN_382] (rows=1 width=233)
- Conds:(Left Outer),Output:["_col1"]
- <-Reducer 50 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_228]
- Select Operator [SEL_186] (rows=1 width=8)
- Filter Operator [FIL_185] (rows=1 width=8)
- predicate:(sq_count_check(_col0) <= 1)
- Group By Operator [GBY_183] (rows=1 width=8)
- Output:["_col0"],aggregations:["count(VALUE._col0)"]
- <-Reducer 49 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_182]
- Group By Operator [GBY_181] (rows=1 width=8)
- Output:["_col0"],aggregations:["count()"]
- Select Operator [SEL_178] (rows=348477374 width=88)
- Group By Operator [GBY_177] (rows=348477374 width=88)
- Output:["_col0"],keys:KEY._col0
- <-Reducer 48 [SIMPLE_EDGE]
- SHUFFLE [RS_176]
- PartitionCols:_col0
- Group By Operator [GBY_175] (rows=696954748 width=88)
- Output:["_col0"],keys:_col0
- Select Operator [SEL_173] (rows=696954748 width=88)
- Output:["_col0"]
- Merge Join Operator [MERGEJOIN_373] (rows=696954748 width=88)
- Conds:RS_170._col1=RS_171._col0(Inner),Output:["_col6"]
- <-Map 55 [SIMPLE_EDGE]
- SHUFFLE [RS_171]
- PartitionCols:_col0
- Select Operator [SEL_166] (rows=80000000 width=860)
- Output:["_col0"]
- Filter Operator [FIL_353] (rows=80000000 width=860)
- predicate:c_customer_sk is not null
- TableScan [TS_164] (rows=80000000 width=860)
- default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
- <-Reducer 47 [SIMPLE_EDGE]
- SHUFFLE [RS_170]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_372] (rows=633595212 width=88)
- Conds:RS_167._col0=RS_168._col0(Inner),Output:["_col1"]
- <-Map 46 [SIMPLE_EDGE]
- SHUFFLE [RS_167]
- PartitionCols:_col0
- Select Operator [SEL_160] (rows=575995635 width=88)
- Output:["_col0","_col1"]
- Filter Operator [FIL_351] (rows=575995635 width=88)
- predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
- TableScan [TS_158] (rows=575995635 width=88)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
- <-Map 54 [SIMPLE_EDGE]
- SHUFFLE [RS_168]
- PartitionCols:_col0
- Select Operator [SEL_163] (rows=36525 width=1119)
- Output:["_col0"]
- Filter Operator [FIL_352] (rows=36525 width=1119)
- predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
- TableScan [TS_161] (rows=73049 width=1119)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
- <-Reducer 60 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_229]
- Group By Operator [GBY_211] (rows=1 width=224)
- Output:["_col0"],aggregations:["max(VALUE._col0)"]
- <-Reducer 59 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_210]
- Group By Operator [GBY_209] (rows=1 width=224)
- Output:["_col0"],aggregations:["max(_col1)"]
- Select Operator [SEL_207] (rows=348477374 width=88)
- Output:["_col1"]
- Group By Operator [GBY_206] (rows=348477374 width=88)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 58 [SIMPLE_EDGE]
- SHUFFLE [RS_205]
- PartitionCols:_col0
- Group By Operator [GBY_204] (rows=696954748 width=88)
- Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
- Select Operator [SEL_202] (rows=696954748 width=88)
- Output:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_375] (rows=696954748 width=88)
- Conds:RS_199._col1=RS_200._col0(Inner),Output:["_col2","_col3","_col6"]
- <-Map 62 [SIMPLE_EDGE]
- SHUFFLE [RS_200]
- PartitionCols:_col0
- Select Operator [SEL_195] (rows=80000000 width=860)
- Output:["_col0"]
- Filter Operator [FIL_356] (rows=80000000 width=860)
- predicate:c_customer_sk is not null
- TableScan [TS_193] (rows=80000000 width=860)
- default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
- <-Reducer 57 [SIMPLE_EDGE]
- SHUFFLE [RS_199]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_374] (rows=633595212 width=88)
- Conds:RS_196._col0=RS_197._col0(Inner),Output:["_col1","_col2","_col3"]
- <-Map 56 [SIMPLE_EDGE]
- SHUFFLE [RS_196]
- PartitionCols:_col0
- Select Operator [SEL_189] (rows=575995635 width=88)
- Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_354] (rows=575995635 width=88)
- predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
- TableScan [TS_187] (rows=575995635 width=88)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"]
- <-Map 61 [SIMPLE_EDGE]
- SHUFFLE [RS_197]
- PartitionCols:_col0
- Select Operator [SEL_192] (rows=36525 width=1119)
- Output:["_col0"]
- Filter Operator [FIL_355] (rows=36525 width=1119)
- predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
- TableScan [TS_190] (rows=73049 width=1119)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
- <-Reducer 65 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_232]
- Group By Operator [GBY_226] (rows=316797606 width=88)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 64 [SIMPLE_EDGE]
- SHUFFLE [RS_225]
- PartitionCols:_col0
- Group By Operator [GBY_224] (rows=633595212 width=88)
- Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
- Select Operator [SEL_222] (rows=633595212 width=88)
- Output:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_376] (rows=633595212 width=88)
- Conds:RS_219._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3"]
- <-Map 63 [SIMPLE_EDGE]
- SHUFFLE [RS_219]
- PartitionCols:_col0
- Select Operator [SEL_215] (rows=575995635 width=88)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_357] (rows=575995635 width=88)
- predicate:ss_customer_sk is not null
- TableScan [TS_213] (rows=575995635 width=88)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"]
- <-Map 66 [SIMPLE_EDGE]
- SHUFFLE [RS_220]
- PartitionCols:_col0
- Select Operator [SEL_218] (rows=80000000 width=860)
+ Select Operator [SEL_227] (rows=105599202 width=321)
+ Output:["_col0"]
+ Filter Operator [FIL_226] (rows=105599202 width=321)
+ predicate:(_col3 > (0.95 * _col1))
+ Merge Join Operator [MERGEJOIN_369] (rows=316797606 width=321)
+ Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"]
+ <-Reducer 48 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_222]
+ Select Operator [SEL_180] (rows=1 width=8)
+ Filter Operator [FIL_179] (rows=1 width=8)
+ predicate:(sq_count_check(_col0) <= 1)
+ Group By Operator [GBY_177] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 47 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_176]
+ Group By Operator [GBY_175] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Select Operator [SEL_172] (rows=348477374 width=88)
+ Group By Operator [GBY_171] (rows=348477374 width=88)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 46 [SIMPLE_EDGE]
+ SHUFFLE [RS_170]
+ PartitionCols:_col0
+ Group By Operator [GBY_169] (rows=696954748 width=88)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_167] (rows=696954748 width=88)
Output:["_col0"]
- Filter Operator [FIL_358] (rows=80000000 width=860)
- predicate:c_customer_sk is not null
- TableScan [TS_216] (rows=80000000 width=860)
- default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
+ Merge Join Operator [MERGEJOIN_361] (rows=696954748 width=88)
+ Conds:RS_164._col1=RS_165._col0(Inner),Output:["_col6"]
+ <-Map 51 [SIMPLE_EDGE]
+ SHUFFLE [RS_165]
+ PartitionCols:_col0
+ Select Operator [SEL_160] (rows=80000000 width=860)
+ Output:["_col0"]
+ Filter Operator [FIL_341] (rows=80000000 width=860)
+ predicate:c_customer_sk is not null
+ TableScan [TS_158] (rows=80000000 width=860)
+ default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
+ <-Reducer 45 [SIMPLE_EDGE]
+ SHUFFLE [RS_164]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_360] (rows=633595212 width=88)
+ Conds:RS_161._col0=RS_162._col0(Inner),Output:["_col1"]
+ <-Map 44 [SIMPLE_EDGE]
+ SHUFFLE [RS_161]
+ PartitionCols:_col0
+ Select Operator [SEL_154] (rows=575995635 width=88)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_339] (rows=575995635 width=88)
+ predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
+ TableScan [TS_152] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
+ <-Map 50 [SIMPLE_EDGE]
+ SHUFFLE [RS_162]
+ PartitionCols:_col0
+ Select Operator [SEL_157] (rows=36525 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_340] (rows=36525 width=1119)
+ predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
+ TableScan [TS_155] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+ <-Reducer 56 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_223]
+ Group By Operator [GBY_205] (rows=1 width=224)
+ Output:["_col0"],aggregations:["max(VALUE._col0)"]
+ <-Reducer 55 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_204]
+ Group By Operator [GBY_203] (rows=1 width=224)
+ Output:["_col0"],aggregations:["max(_col1)"]
+ Select Operator [SEL_201] (rows=348477374 width=88)
+ Output:["_col1"]
+ Group By Operator [GBY_200] (rows=348477374 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 54 [SIMPLE_EDGE]
+ SHUFFLE [RS_199]
+ PartitionCols:_col0
+ Group By Operator [GBY_198] (rows=696954748 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Select Operator [SEL_196] (rows=696954748 width=88)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_363] (rows=696954748 width=88)
+ Conds:RS_193._col1=RS_194._col0(Inner),Output:["_col2","_col3","_col6"]
+ <-Map 58 [SIMPLE_EDGE]
+ SHUFFLE [RS_194]
+ PartitionCols:_col0
+ Select Operator [SEL_189] (rows=80000000 width=860)
+ Output:["_col0"]
+ Filter Operator [FIL_344] (rows=80000000 width=860)
+ predicate:c_customer_sk is not null
+ TableScan [TS_187] (rows=80000000 width=860)
+ default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
+ <-Reducer 53 [SIMPLE_EDGE]
+ SHUFFLE [RS_193]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_362] (rows=633595212 width=88)
+ Conds:RS_190._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 52 [SIMPLE_EDGE]
+ SHUFFLE [RS_190]
+ PartitionCols:_col0
+ Select Operator [SEL_183] (rows=575995635 width=88)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_342] (rows=575995635 width=88)
+ predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
+ TableScan [TS_181] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"]
+ <-Map 57 [SIMPLE_EDGE]
+ SHUFFLE [RS_191]
+ PartitionCols:_col0
+ Select Operator [SEL_186] (rows=36525 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_343] (rows=36525 width=1119)
+ predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
+ TableScan [TS_184] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+ <-Reducer 61 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_224]
+ Group By Operator [GBY_220] (rows=316797606 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 60 [SIMPLE_EDGE]
+ SHUFFLE [RS_219]
+ PartitionCols:_col0
+ Group By Operator [GBY_218] (rows=633595212 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Select Operator [SEL_216] (rows=633595212 width=88)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_364] (rows=633595212 width=88)
+ Conds:RS_213._col0=RS_214._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 59 [SIMPLE_EDGE]
+ SHUFFLE [RS_213]
+ PartitionCols:_col0
+ Select Operator [SEL_209] (rows=575995635 width=88)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_345] (rows=575995635 width=88)
+ predicate:ss_customer_sk is not null
+ TableScan [TS_207] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"]
+ <-Map 62 [SIMPLE_EDGE]
+ SHUFFLE [RS_214]
+ PartitionCols:_col0
+ Select Operator [SEL_212] (rows=80000000 width=860)
+ Output:["_col0"]
+ Filter Operator [FIL_346] (rows=80000000 width=860)
+ predicate:c_customer_sk is not null
+ TableScan [TS_210] (rows=80000000 width=860)
+ default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
<-Reducer 4 [CONTAINS]
- Reduce Output Operator [RS_254]
- Group By Operator [GBY_253] (rows=1 width=112)
+ Reduce Output Operator [RS_242]
+ Group By Operator [GBY_241] (rows=1 width=112)
Output:["_col0"],aggregations:["sum(_col0)"]
- Select Operator [SEL_124] (rows=383314495 width=135)
+ Select Operator [SEL_118] (rows=383314495 width=135)
Output:["_col0"]
- Merge Join Operator [MERGEJOIN_381] (rows=383314495 width=135)
- Conds:RS_121._col1=RS_122._col0(Inner),Output:["_col3","_col4"]
- <-Reducer 21 [SIMPLE_EDGE]
- SHUFFLE [RS_122]
+ Merge Join Operator [MERGEJOIN_368] (rows=383314495 width=135)
+ Conds:RS_115._col1=RS_116._col0(Inner),Output:["_col3","_col4"]
+ <-Reducer 19 [SIMPLE_EDGE]
+ SHUFFLE [RS_116]
PartitionCols:_col0
- Group By Operator [GBY_113] (rows=52799601 width=322)
- Output:["_col0"],keys:KEY._col0
- <-Reducer 20 [SIMPLE_EDGE]
- SHUFFLE [RS_112]
- PartitionCols:_col0
- Group By Operator [GBY_111] (rows=105599202 width=322)
- Output:["_col0"],keys:_col2
- Select Operator [SEL_110] (rows=105599202 width=322)
- Output:["_col2"]
- Filter Operator [FIL_109] (rows=105599202 width=322)
- predicate:(_col3 > (0.95 * _col1))
- Merge Join Operator [MERGEJOIN_380] (rows=316797606 width=322)
- Conds:(Inner),Output:["_col1","_col2","_col3"]
- <-Reducer 19 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_106]
- Merge Join Operator [MERGEJOIN_379] (rows=1 width=233)
- Conds:(Left Outer),Output:["_col1"]
- <-Reducer 18 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_103]
- Select Operator [SEL_61] (rows=1 width=8)
- Filter Operator [FIL_60] (rows=1 width=8)
- predicate:(sq_count_check(_col0) <= 1)
- Group By Operator [GBY_58] (rows=1 width=8)
- Output:["_col0"],aggregations:["count(VALUE._col0)"]
- <-Reducer 17 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_57]
- Group By Operator [GBY_56] (rows=1 width=8)
- Output:["_col0"],aggregations:["count()"]
- Select Operator [SEL_53] (rows=348477374 width=88)
- Group By Operator [GBY_52] (rows=348477374 width=88)
- Output:["_col0"],keys:KEY._col0
- <-Reducer 16 [SIMPLE_EDGE]
- SHUFFLE [RS_51]
- PartitionCols:_col0
- Group By Operator [GBY_50] (rows=696954748 width=88)
- Output:["_col0"],keys:_col0
- Select Operator [SEL_48] (rows=696954748 width=88)
- Output:["_col0"]
- Merge Join Operator [MERGEJOIN_365] (rows=696954748 width=88)
- Conds:RS_45._col1=RS_46._col0(Inner),Output:["_col6"]
- <-Map 23 [SIMPLE_EDGE]
- SHUFFLE [RS_46]
- PartitionCols:_col0
- Select Operator [SEL_41] (rows=80000000 width=860)
- Output:["_col0"]
- Filter Operator [FIL_340] (rows=80000000 width=860)
- predicate:c_customer_sk is not null
- TableScan [TS_39] (rows=80000000 width=860)
- default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
- <-Reducer 15 [SIMPLE_EDGE]
- SHUFFLE [RS_45]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_364] (rows=633595212 width=88)
- Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1"]
- <-Map 14 [SIMPLE_EDGE]
- SHUFFLE [RS_42]
- PartitionCols:_col0
- Select Operator [SEL_35] (rows=575995635 width=88)
- Output:["_col0","_col1"]
- Filter Operator [FIL_338] (rows=575995635 width=88)
- predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
- TableScan [TS_33] (rows=575995635 width=88)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
- <-Map 22 [SIMPLE_EDGE]
- SHUFFLE [RS_43]
- PartitionCols:_col0
- Select Operator [SEL_38] (rows=36525 width=1119)
- Output:["_col0"]
- Filter Operator [FIL_339] (rows=36525 width=1119)
- predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
- TableScan [TS_36] (rows=73049 width=1119)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
- <-Reducer 28 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_104]
- Group By Operator [GBY_86] (rows=1 width=224)
- Output:["_col0"],aggregations:["max(VALUE._col0)"]
- <-Reducer 27 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_85]
- Group By Operator [GBY_84] (rows=1 width=224)
- Output:["_col0"],aggregations:["max(_col1)"]
- Select Operator [SEL_82] (rows=348477374 width=88)
- Output:["_col1"]
- Group By Operator [GBY_81] (rows=348477374 width=88)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 26 [SIMPLE_EDGE]
- SHUFFLE [RS_80]
- PartitionCols:_col0
- Group By Operator [GBY_79] (rows=696954748 width=88)
- Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
- Select Operator [SEL_77] (rows=696954748 width=88)
- Output:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_367] (rows=696954748 width=88)
- Conds:RS_74._col1=RS_75._col0(Inner),Output:["_col2","_col3","_col6"]
- <-Map 30 [SIMPLE_EDGE]
- SHUFFLE [RS_75]
- PartitionCols:_col0
- Select Operator [SEL_70] (rows=80000000 width=860)
- Output:["_col0"]
- Filter Operator [FIL_343] (rows=80000000 width=860)
- predicate:c_customer_sk is not null
- TableScan [TS_68] (rows=80000000 width=860)
- default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
- <-Reducer 25 [SIMPLE_EDGE]
- SHUFFLE [RS_74]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_366] (rows=633595212 width=88)
- Conds:RS_71._col0=RS_72._col0(Inner),Output:["_col1","_col2","_col3"]
- <-Map 24 [SIMPLE_EDGE]
- SHUFFLE [RS_71]
- PartitionCols:_col0
- Select Operator [SEL_64] (rows=575995635 width=88)
- Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_341] (rows=575995635 width=88)
- predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
- TableScan [TS_62] (rows=575995635 width=88)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"]
- <-Map 29 [SIMPLE_EDGE]
- SHUFFLE [RS_72]
- PartitionCols:_col0
- Select Operator [SEL_67] (rows=36525 width=1119)
- Output:["_col0"]
- Filter Operator [FIL_342] (rows=36525 width=1119)
- predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
- TableScan [TS_65] (rows=73049 width=1119)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
- <-Reducer 33 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_107]
- Group By Operator [GBY_101] (rows=316797606 width=88)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 32 [SIMPLE_EDGE]
- SHUFFLE [RS_100]
- PartitionCols:_col0
- Group By Operator [GBY_99] (rows=633595212 width=88)
- Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
- Select Operator [SEL_97] (rows=633595212 width=88)
- Output:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_368] (rows=633595212 width=88)
- Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col1","_col2","_col3"]
- <-Map 31 [SIMPLE_EDGE]
- SHUFFLE [RS_94]
- PartitionCols:_col0
- Select Operator [SEL_90] (rows=575995635 width=88)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_344] (rows=575995635 width=88)
- predicate:ss_customer_sk is not null
- TableScan [TS_88] (rows=575995635 width=88)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"]
- <-Map 34 [SIMPLE_EDGE]
- SHUFFLE [RS_95]
- PartitionCols:_col0
- Select Operator [SEL_93] (rows=80000000 width=860)
+ Select Operator [SEL_108] (rows=105599202 width=321)
+ Output:["_col0"]
+ Filter Operator [FIL_107] (rows=105599202 width=321)
+ predicate:(_col3 > (0.95 * _col1))
+ Merge Join Operator [MERGEJOIN_367] (rows=316797606 width=321)
+ Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"]
+ <-Reducer 18 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_103]
+ Select Operator [SEL_61] (rows=1 width=8)
+ Filter Operator [FIL_60] (rows=1 width=8)
+ predicate:(sq_count_check(_col0) <= 1)
+ Group By Operator [GBY_58] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 17 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_57]
+ Group By Operator [GBY_56] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Select Operator [SEL_53] (rows=348477374 width=88)
+ Group By Operator [GBY_52] (rows=348477374 width=88)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_51]
+ PartitionCols:_col0
+ Group By Operator [GBY_50] (rows=696954748 width=88)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_48] (rows=696954748 width=88)
Output:["_col0"]
- Filter Operator [FIL_345] (rows=80000000 width=860)
- predicate:c_customer_sk is not null
- TableScan [TS_91] (rows=80000000 width=860)
- default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
+ Merge Join Operator [MERGEJOIN_353] (rows=696954748 width=88)
+ Conds:RS_45._col1=RS_46._col0(Inner),Output:["_col6"]
+ <-Map 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_46]
+ PartitionCols:_col0
+ Select Operator [SEL_41] (rows=80000000 width=860)
+ Output:["_col0"]
+ Filter Operator [FIL_328] (rows=80000000 width=860)
+ predicate:c_customer_sk is not null
+ TableScan [TS_39] (rows=80000000 width=860)
+ default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
+ <-Reducer 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_45]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_352] (rows=633595212 width=88)
+ Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1"]
+ <-Map 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_42]
+ PartitionCols:_col0
+ Select Operator [SEL_35] (rows=575995635 width=88)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_326] (rows=575995635 width=88)
+ predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
+ TableScan [TS_33] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
+ <-Map 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_43]
+ PartitionCols:_col0
+ Select Operator [SEL_38] (rows=36525 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_327] (rows=36525 width=1119)
+ predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
+ TableScan [TS_36] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+ <-Reducer 26 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_104]
+ Group By Operator [GBY_86] (rows=1 width=224)
+ Output:["_col0"],aggregations:["max(VALUE._col0)"]
+ <-Reducer 25 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_85]
+ Group By Operator [GBY_84] (rows=1 width=224)
+ Output:["_col0"],aggregations:["max(_col1)"]
+ Select Operator [SEL_82] (rows=348477374 width=88)
+ Output:["_col1"]
+ Group By Operator [GBY_81] (rows=348477374 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 24 [SIMPLE_EDGE]
+ SHUFFLE [RS_80]
+ PartitionCols:_col0
+ Group By Operator [GBY_79] (rows=696954748 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Select Operator [SEL_77] (rows=696954748 width=88)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_355] (rows=696954748 width=88)
+ Conds:RS_74._col1=RS_75._col0(Inner),Output:["_col2","_col3","_col6"]
+ <-Map 28 [SIMPLE_EDGE]
+ SHUFFLE [RS_75]
+ PartitionCols:_col0
+ Select Operator [SEL_70] (rows=80000000 width=860)
+ Output:["_col0"]
+ Filter Operator [FIL_331] (rows=80000000 width=860)
+ predicate:c_customer_sk is not null
+ TableScan [TS_68] (rows=80000000 width=860)
+ default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
+ <-Reducer 23 [SIMPLE_EDGE]
+ SHUFFLE [RS_74]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_354] (rows=633595212 width=88)
+ Conds:RS_71._col0=RS_72._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 22 [SIMPLE_EDGE]
+ SHUFFLE [RS_71]
+ PartitionCols:_col0
+ Select Operator [SEL_64] (rows=575995635 width=88)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_329] (rows=575995635 width=88)
+ predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
+ TableScan [TS_62] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"]
+ <-Map 27 [SIMPLE_EDGE]
+ SHUFFLE [RS_72]
+ PartitionCols:_col0
+ Select Operator [SEL_67] (rows=36525 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_330] (rows=36525 width=1119)
+ predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
+ TableScan [TS_65] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+ <-Reducer 31 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_105]
+ Group By Operator [GBY_101] (rows=316797606 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 30 [SIMPLE_EDGE]
+ SHUFFLE [RS_100]
+ PartitionCols:_col0
+ Group By Operator [GBY_99] (rows=633595212 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Select Operator [SEL_97] (rows=633595212 width=88)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_356] (rows=633595212 width=88)
+ Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 29 [SIMPLE_EDGE]
+ SHUFFLE [RS_94]
+ PartitionCols:_col0
+ Select Operator [SEL_90] (rows=575995635 width=88)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_332] (rows=575995635 width=88)
+ predicate:ss_customer_sk is not null
+ TableScan [TS_88] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"]
+ <-Map 32 [SIMPLE_EDGE]
+ SHUFFLE [RS_95]
+ PartitionCols:_col0
+ Select Operator [SEL_93] (rows=80000000 width=860)
+ Output:["_col0"]
+ Filter Operator [FIL_333] (rows=80000000 width=860)
+ predicate:c_customer_sk is not null
+ TableScan [TS_91] (rows=80000000 width=860)
+ default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
<-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_121]
+ SHUFFLE [RS_115]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_377] (rows=348467716 width=135)
- Conds:RS_118._col2=RS_119._col0(Inner),Output:["_col1","_col3","_col4"]
+ Merge Join Operator [MERGEJOIN_365] (rows=348467716 width=135)
+ Conds:RS_112._col2=RS_113._col0(Inner),Output:["_col1","_col3","_col4"]
<-Reducer 11 [SIMPLE_EDGE]
- SHUFFLE [RS_119]
+ SHUFFLE [RS_113]
PartitionCols:_col0
Group By Operator [GBY_31] (rows=58079562 width=88)
Output:["_col0"],keys:_col1
@@ -586,7 +558,7 @@ Stage-0
Output:["_col1"]
Filter Operator [FIL_26] (rows=116159124 width=88)
predicate:(_col3 > 4)
- Select Operator [SEL_359] (rows=348477374 width=88)
+ Select Operator [SEL_347] (rows=348477374 width=88)
Output:["_col0","_col3"]
Group By Operator [GBY_25] (rows=348477374 width=88)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
@@ -597,28 +569,28 @@ Stage-0
Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2
Select Operator [SEL_21] (rows=696954748 width=88)
Output:["_col0","_col1","_col2"]
- Merge Join Operator [MERGEJOIN_363] (rows=696954748 width=88)
+ Merge Join Operator [MERGEJOIN_351] (rows=696954748 width=88)
Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col3","_col5","_col6"]
<-Map 13 [SIMPLE_EDGE]
SHUFFLE [RS_19]
PartitionCols:_col0
Select Operator [SEL_14] (rows=462000 width=1436)
Output:["_col0","_col1"]
- Filter Operator [FIL_337] (rows=462000 width=1436)
+ Filter Operator [FIL_325] (rows=462000 width=1436)
predicate:i_item_sk is not null
TableScan [TS_12] (rows=462000 width=1436)
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"]
<-Reducer 9 [SIMPLE_EDGE]
SHUFFLE [RS_18]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_362] (rows=633595212 width=88)
+ Merge Join Operator [MERGEJOIN_350] (rows=633595212 width=88)
Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col3"]
<-Map 12 [SIMPLE_EDGE]
SHUFFLE [RS_16]
PartitionCols:_col0
Select Operator [SEL_11] (rows=36525 width=1119)
Output:["_col0","_col1"]
- Filter Operator [FIL_336] (rows=36525 width=1119)
+ Filter Operator [FIL_324] (rows=36525 width=1119)
predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
TableScan [TS_9] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"]
@@ -627,30 +599,30 @@ Stage-0
PartitionCols:_col0
Select Operator [SEL_8] (rows=575995635 width=88)
Output:["_col0","_col1"]
- Filter Operator [FIL_335] (rows=575995635 width=88)
+ Filter Operator [FIL_323] (rows=575995635 width=88)
predicate:(ss_sold_date_sk is not null and ss_item_sk is not null)
TableScan [TS_6] (rows=575995635 width=88)
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"]
<-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_118]
+ SHUFFLE [RS_112]
PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_361] (rows=316788826 width=135)
- Conds:RS_115._col0=RS_116._col0(Inner),Output:["_col1","_col2","_col3","_col4"]
+ Merge Join Operator [MERGEJOIN_349] (rows=316788826 width=135)
+ Conds:RS_109._col0=RS_110._col0(Inner),Output:["_col1","_col2","_col3","_col4"]
<-Map 1 [SIMPLE_EDGE]
- SHUFFLE [RS_115]
+ SHUFFLE [RS_109]
PartitionCols:_col0
Select Operator [SEL_2] (rows=287989836 width=135)
Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_333] (rows=287989836 width=135)
+ Filter Operator [FIL_321] (rows=287989836 width=135)
predicate:(cs_item_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null)
TableScan [TS_0] (rows=287989836 width=135)
default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"]
<-Map 7 [SIMPLE_EDGE]
- SHUFFLE [RS_116]
+ SHUFFLE [RS_110]
PartitionCols:_col0
Select Operator [SEL_5] (rows=18262 width=1119)
Output:["_col0"]
- Filter Operator [FIL_334] (rows=18262 width=1119)
+ Filter Operator [FIL_322] (rows=18262 width=1119)
predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null)
TableScan [TS_3] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
diff --git a/ql/src/test/results/clientpositive/router_join_ppr.q.out b/ql/src/test/results/clientpositive/router_join_ppr.q.out
index cc2b07e..f17959b 100644
--- a/ql/src/test/results/clientpositive/router_join_ppr.q.out
+++ b/ql/src/test/results/clientpositive/router_join_ppr.q.out
@@ -30,41 +30,41 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
tag: 0
value expressions: _col1 (type: string)
auto parallelism: false
TableScan
alias: b
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
- Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string), ds (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
tag: 1
- value expressions: _col1 (type: string), _col2 (type: string)
+ value expressions: _col1 (type: string)
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
@@ -211,149 +211,42 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
-#### A masked pattern was here ####
- Partition
- base file name: hr=11
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 11
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
- bucket_count -1
- column.name.delimiter ,
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- numFiles 1
- numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
- rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- column.name.delimiter ,
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
-#### A masked pattern was here ####
- Partition
- base file name: hr=12
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 12
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
- bucket_count -1
- column.name.delimiter ,
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- numFiles 1
- numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
- rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- column.name.delimiter ,
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
Truncated Path -> Alias:
/src [$hdt$_0:a]
/srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b]
/srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b]
- /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b]
- /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:b]
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
- filter mappings:
- 1 [0, 1]
- filter predicates:
- 0
- 1 {(VALUE._col1 = '2008-04-08')}
+ Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- isSamplingPred: false
- predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean)
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types string:string:string:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -373,8 +266,6 @@ PREHOOK: Input: default@src
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: FROM
src a
@@ -388,8 +279,6 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
17 val_17 17 val_17
17 val_17 17 val_17
@@ -435,18 +324,18 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
tag: 0
value expressions: _col1 (type: string)
auto parallelism: false
@@ -456,18 +345,18 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
tag: 1
value expressions: _col1 (type: string)
auto parallelism: false
@@ -624,42 +513,38 @@ STAGE PLANS:
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
+ Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col3, _col4
- Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- isSamplingPred: false
- predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean)
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types string:string:string:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -737,18 +622,18 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
tag: 0
value expressions: _col1 (type: string)
auto parallelism: false
@@ -758,18 +643,18 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0) and (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
tag: 1
value expressions: _col1 (type: string)
auto parallelism: false
@@ -926,38 +811,34 @@ STAGE PLANS:
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
+ Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- isSamplingPred: false
- predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean)
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 13 Data size: 139 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types string:string:string:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out
index d2a879d..84b9250 100644
--- a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out
@@ -202,7 +202,7 @@ STAGE PLANS:
alias: vectortab_b_1korc
Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean)
+ predicate: (s is not null and dt is not null and ts is not null) (type: boolean)
Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time)
@@ -226,8 +226,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 14)(children: DateColSubtractDateColumn(col 12, col 13)(children: CastTimestampToDate(col 10) -> 13:date) -> 14:timestamp) -> boolean) -> boolean
- predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean)
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 12) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean
+ predicate: (s is not null and dt is not null and ts is not null) (type: boolean)
Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time)