diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java index 5a48d0f0dc..3e2a1711f6 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java @@ -90,7 +90,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.druid.serde.HiveDruidSerializationModule; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; @@ -195,9 +194,6 @@ JSON_MAPPER.setInjectableValues(injectableValues); SMILE_MAPPER.setInjectableValues(injectableValues); - HiveDruidSerializationModule hiveDruidSerializationModule = new HiveDruidSerializationModule(); - JSON_MAPPER.registerModule(hiveDruidSerializationModule); - SMILE_MAPPER.registerModule(hiveDruidSerializationModule); // Register the shard sub type to be used by the mapper JSON_MAPPER.registerSubtypes(new NamedType(LinearShardSpec.class, "linear")); JSON_MAPPER.registerSubtypes(new NamedType(NumberedShardSpec.class, "numbered")); diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java index df9049ea02..20ef0ced5b 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java @@ -99,7 +99,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.hadoop.hive.druid.serde.DruidSerDeUtils.TIMESTAMP_FORMAT; import static org.joda.time.format.ISODateTimeFormat.dateOptionalTimeParser; /** @@ -347,7 +346,7 @@ protected SegmentAnalysis submitMetadataRequest(String address, SegmentMetadataQ break; case BOOLEAN: res = ((BooleanObjectInspector) fields.get(i).getFieldObjectInspector()) - .get(values.get(i)); + .get(values.get(i)) ? 1L : 0L; break; default: throw new SerDeException("Unsupported type: " + types[i].getPrimitiveCategory()); @@ -394,40 +393,22 @@ protected SegmentAnalysis submitMetadataRequest(String address, SegmentMetadataQ continue; } switch (types[i].getPrimitiveCategory()) { - case TIMESTAMP: - if (value instanceof Number) { - output.add(new TimestampWritableV2(Timestamp.valueOf( - ZonedDateTime.ofInstant(Instant.ofEpochMilli(((Number) value).longValue()), tsTZTypeInfo.timeZone()) - .format(DateTimeFormatter.ofPattern(TIMESTAMP_FORMAT))))); - } else { - output.add(new TimestampWritableV2(Timestamp.valueOf((String) value))); - } - - break; - case TIMESTAMPLOCALTZ: - final long numberOfMillis; - if (value instanceof Number) { - numberOfMillis = ((Number) value).longValue(); - } else { - // it is an extraction fn need to be parsed - numberOfMillis = dateOptionalTimeParser().parseDateTime((String) value).getMillis(); - } - output.add(new TimestampLocalTZWritable(new TimestampTZ(ZonedDateTime - .ofInstant(Instant.ofEpochMilli(numberOfMillis), - ((TimestampLocalTZTypeInfo) types[i]).timeZone() - )))); - break; - case DATE: - final DateWritableV2 dateWritable; - if (value instanceof Number) { - dateWritable = new DateWritableV2( - Date.ofEpochMilli((((Number) value).longValue()))); - } else { - // it is an extraction fn need to be parsed - dateWritable = new DateWritableV2( - Date.ofEpochMilli(dateOptionalTimeParser().parseDateTime((String) value).getMillis())); - } - output.add(dateWritable); + case TIMESTAMP: + output.add(new TimestampWritableV2( + Timestamp.ofEpochMilli(deserializeToMillis(value)))); + break; + case TIMESTAMPLOCALTZ: + output.add(new TimestampLocalTZWritable( + new TimestampTZ( + ZonedDateTime + .ofInstant( + Instant.ofEpochMilli(deserializeToMillis(value)), + ((TimestampLocalTZTypeInfo) types[i]).timeZone() + )))); + break; + case DATE: + output.add(new DateWritableV2( + Date.ofEpochMilli(deserializeToMillis(value)))); break; case BYTE: output.add(new ByteWritable(((Number) value).byteValue())); @@ -478,6 +459,18 @@ protected SegmentAnalysis submitMetadataRequest(String address, SegmentMetadataQ return output; } + private long deserializeToMillis(Object value) + { + long numberOfMillis; + if (value instanceof Number) { + numberOfMillis = ((Number) value).longValue(); + } else { + // it is an extraction fn need to be parsed + numberOfMillis = dateOptionalTimeParser().parseDateTime((String) value).getMillis(); + } + return numberOfMillis; + } + @Override public ObjectInspector getObjectInspector() { return inspector; } diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDeUtils.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDeUtils.java index 630e097c19..c04f2dcbe1 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDeUtils.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDeUtils.java @@ -29,9 +29,6 @@ private static final Logger LOG = LoggerFactory.getLogger(DruidSerDeUtils.class); - protected static final String ISO_TIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; - protected static final String TIMESTAMP_FORMAT = "yyyy-MM-dd HH:mm:ss"; - protected static final String FLOAT_TYPE = "FLOAT"; protected static final String DOUBLE_TYPE = "DOUBLE"; protected static final String LONG_TYPE = "LONG"; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/HiveDruidSerializationModule.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/HiveDruidSerializationModule.java deleted file mode 100644 index 8a110ae6e9..0000000000 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/HiveDruidSerializationModule.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.druid.serde; - -import io.druid.java.util.common.granularity.PeriodGranularity; -import io.druid.query.spec.LegacySegmentSpec; - -import com.fasterxml.jackson.core.util.VersionUtil; -import com.fasterxml.jackson.databind.module.SimpleModule; - -import org.joda.time.Interval; - -/** - * This class is used to define/override any serde behavior for classes from druid. - * Currently it is used to override the default behavior when serializing PeriodGranularity to include user timezone. - */ -public class HiveDruidSerializationModule extends SimpleModule { - private static final String NAME = "HiveDruidSerializationModule"; - private static final VersionUtil VERSION_UTIL = new VersionUtil() {}; - - public HiveDruidSerializationModule() { - super(NAME, VERSION_UTIL.version()); - addSerializer(PeriodGranularity.class, new PeriodGranularitySerializer()); - } -} \ No newline at end of file diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/PeriodGranularitySerializer.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/PeriodGranularitySerializer.java deleted file mode 100644 index 10f91729e8..0000000000 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/PeriodGranularitySerializer.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.druid.serde; - -import io.druid.java.util.common.granularity.PeriodGranularity; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.jsontype.TypeSerializer; - -import org.joda.time.DateTimeZone; - -import java.io.IOException; - -public class PeriodGranularitySerializer extends JsonSerializer { - - @Override - public void serialize(PeriodGranularity granularity, JsonGenerator jsonGenerator, - SerializerProvider serializerProvider) throws IOException, JsonProcessingException { - // Set timezone based on user timezone if origin is not already set - // as it is default Hive time semantics to consider user timezone. - PeriodGranularity granularityWithUserTimezone = new PeriodGranularity( - granularity.getPeriod(), - granularity.getOrigin(), - DateTimeZone.getDefault() - ); - granularityWithUserTimezone.serialize(jsonGenerator, serializerProvider); - } - - @Override - public void serializeWithType(PeriodGranularity value, JsonGenerator gen, - SerializerProvider serializers, TypeSerializer typeSer) throws IOException { - serialize(value, gen, serializers); - } -} - - diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index b5ae3905e2..918a725ba4 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1699,6 +1699,7 @@ spark.perf.disabled.query.files=query14.q,\ query64.q druid.query.files=druidmini_test1.q,\ + druidmini_test_ts.q,\ druid_basic2.q,\ druidmini_joins.q,\ druidmini_test_insert.q,\ diff --git a/jdbc-handler/pom.xml b/jdbc-handler/pom.xml index e892708a90..f90892f127 100644 --- a/jdbc-handler/pom.xml +++ b/jdbc-handler/pom.xml @@ -86,6 +86,12 @@ + + commons-dbcp + commons-dbcp + ${commons-dbcp.version} + + org.hamcrest hamcrest-all diff --git a/pom.xml b/pom.xml index 79b5ee6126..d92bd1f884 100644 --- a/pom.xml +++ b/pom.xml @@ -122,10 +122,10 @@ 0.1 0.8.0 - 1.11.0 + 1.12.0 1.8.2 0.8.0.RELEASE - 1.16.0 + 1.17.0 4.2.4 4.1.17 4.1.19 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java index 0ce359f4a2..4297537adb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java @@ -267,6 +267,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Lists.newArrayList(fsParent.getSchema().getSignature()); final ArrayList descs = Lists.newArrayList(); final List colNames = Lists.newArrayList(); + PrimitiveCategory timestampType = null; int timestampPos = -1; for (int i = 0; i < parentCols.size(); i++) { ColumnInfo ci = parentCols.get(i); @@ -274,11 +275,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, descs.add(columnDesc); colNames.add(columnDesc.getExprString()); if (columnDesc.getTypeInfo().getCategory() == ObjectInspector.Category.PRIMITIVE - && ((PrimitiveTypeInfo) columnDesc.getTypeInfo()).getPrimitiveCategory() == PrimitiveCategory.TIMESTAMPLOCALTZ) { + && (((PrimitiveTypeInfo) columnDesc.getTypeInfo()).getPrimitiveCategory() == PrimitiveCategory.TIMESTAMP || + ((PrimitiveTypeInfo) columnDesc.getTypeInfo()).getPrimitiveCategory() == PrimitiveCategory.TIMESTAMPLOCALTZ)) { if (timestampPos != -1) { - throw new SemanticException("Multiple columns with timestamp with local time-zone type on query result; " - + "could not resolve which one is the timestamp with local time-zone column"); + throw new SemanticException("Multiple columns with timestamp/timestamp with local time-zone type on query result; " + + "could not resolve which one is the right column"); } + timestampType = ((PrimitiveTypeInfo) columnDesc.getTypeInfo()).getPrimitiveCategory(); timestampPos = i; } } @@ -327,8 +330,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } - // Timestamp column type in Druid is timestamp with local time-zone, as it represents - // a specific instant in time. Thus, we have this value and we need to extract the + // Timestamp column type in Druid is either timestamp or timestamp with local time-zone, i.e., + // a specific instant in time. Thus, for the latest, we have this value and we need to extract the // granularity to split the data when we are storing it in Druid. However, Druid stores // the data in UTC. Thus, we need to apply the following logic on the data to extract // the granularity correctly: @@ -341,18 +344,20 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // #1 - Read the column value ExprNodeDesc expr = new ExprNodeColumnDesc(parentCols.get(timestampPos)); - // #2 - UTC epoch for instant - ExprNodeGenericFuncDesc f1 = new ExprNodeGenericFuncDesc( - TypeInfoFactory.longTypeInfo, new GenericUDFEpochMilli(), Lists.newArrayList(expr)); - // #3 - Cast to timestamp - ExprNodeGenericFuncDesc f2 = new ExprNodeGenericFuncDesc( - TypeInfoFactory.timestampTypeInfo, new GenericUDFTimestamp(), Lists.newArrayList(f1)); + if (timestampType == PrimitiveCategory.TIMESTAMPLOCALTZ) { + // #2 - UTC epoch for instant + expr = new ExprNodeGenericFuncDesc( + TypeInfoFactory.longTypeInfo, new GenericUDFEpochMilli(), Lists.newArrayList(expr)); + // #3 - Cast to timestamp + expr = new ExprNodeGenericFuncDesc( + TypeInfoFactory.timestampTypeInfo, new GenericUDFTimestamp(), Lists.newArrayList(expr)); + } // #4 - We apply the granularity function - ExprNodeGenericFuncDesc f3 = new ExprNodeGenericFuncDesc( + expr = new ExprNodeGenericFuncDesc( TypeInfoFactory.timestampTypeInfo, new GenericUDFBridge(udfName, false, udfClass.getName()), - Lists.newArrayList(f2)); - descs.add(f3); + Lists.newArrayList(expr)); + descs.add(expr); colNames.add(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME); // Add granularity to the row schema final ColumnInfo ci = new ColumnInfo(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, TypeInfoFactory.timestampTypeInfo, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index a09e7f6036..f43ef01293 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -52,7 +52,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; -import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -223,11 +222,7 @@ public void onMatch(RelOptRuleCall call) { // Update condition final Mapping mapping = (Mapping) Mappings.target( - new Function() { - public Integer apply(Integer a0) { - return map.get(a0); - } - }, + map::get, join.getRowType().getFieldCount(), belowOffset); final RexNode newCondition = diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index d950991a4c..6dd00189d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.Hive; @@ -323,7 +324,7 @@ public ExprNodeDesc visitLiteral(RexLiteral literal) { // Calcite stores timestamp with local time-zone in UTC internally, thus // when we bring it back, we need to add the UTC suffix. return new ExprNodeConstantDesc(TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone()), - literal.getValueAs(TimestampString.class).toString() + " UTC"); + TimestampTZUtil.parse(literal.getValueAs(TimestampString.class).toString() + " UTC")); case BINARY: return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); case DECIMAL: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java index 6aa98c08c4..ece6e774ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java @@ -38,6 +38,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveDateAddSqlOperator; @@ -178,10 +179,11 @@ private DruidSqlOperatorConverter() { return null; } if (SqlTypeUtil.isDatetime(call.getOperands().get(0).getType())) { + final TimeZone tz = timezoneId(query, call.getOperands().get(0)); return applyTimestampFormat( - DruidExpressions.applyTimestampFloor(arg, Period.days(1).toString(), "", timezoneId(query)), YYYY_MM_DD, - timezoneId(query) - ); + DruidExpressions.applyTimestampFloor(arg, Period.days(1).toString(), "", tz), + YYYY_MM_DD, + tz); } return null; } else if (call.getOperands().size() == 2) { @@ -207,9 +209,11 @@ private DruidSqlOperatorConverter() { //bail out can not infer unit return null; } - return applyTimestampFormat(DruidExpressions.applyTimestampFloor(arg, unit, "", timezoneId(query)), YYYY_MM_DD, - timezoneId(query) - ); + final TimeZone tz = timezoneId(query, call.getOperands().get(0)); + return applyTimestampFormat( + DruidExpressions.applyTimestampFloor(arg, unit, "", tz), + YYYY_MM_DD, + tz); } return null; } @@ -235,7 +239,11 @@ private DruidSqlOperatorConverter() { if (arg == null) { return null; } - return DruidExpressions.applyTimestampFloor(arg, Period.days(1).toString(), "", timezoneId(query)); + return DruidExpressions.applyTimestampFloor( + arg, + Period.days(1).toString(), + "", + timezoneId(query, call.getOperands().get(0))); } } @@ -288,7 +296,7 @@ private DruidSqlOperatorConverter() { call.getOperands().size() == 1 ? DruidExpressions.stringLiteral(DEFAULT_TS_FORMAT) : DruidExpressions .toDruidExpression(call.getOperands().get(1), rowType, query); return DruidExpressions.functionCall("timestamp_format", - ImmutableList.of(numMillis, format, DruidExpressions.stringLiteral(timezoneId(query).getID())) + ImmutableList.of(numMillis, format, DruidExpressions.stringLiteral(TimeZone.getTimeZone("UTC").getID())) ); } } @@ -325,10 +333,13 @@ public DruidDateArithmeticOperatorConversion(int direction, SqlOperator operator } final String steps = direction == -1 ? DruidQuery.format("-( %s )", arg1) : arg1; - return DruidExpressions.functionCall("timestamp_shift", ImmutableList - .of(arg0, DruidExpressions.stringLiteral("P1D"), steps, - DruidExpressions.stringLiteral(timezoneId(query).getID()) - )); + return DruidExpressions.functionCall( + "timestamp_shift", + ImmutableList.of( + arg0, + DruidExpressions.stringLiteral("P1D"), + steps, + DruidExpressions.stringLiteral(timezoneId(query, call.getOperands().get(0)).getID()))); } } @@ -337,9 +348,11 @@ public DruidDateArithmeticOperatorConversion(int direction, SqlOperator operator * @param query Druid Rel * @return time zone */ - private static TimeZone timezoneId(final DruidQuery query) { - return TimeZone.getTimeZone( - query.getTopNode().getCluster().getPlanner().getContext().unwrap(CalciteConnectionConfig.class).timeZone()); + private static TimeZone timezoneId(final DruidQuery query, final RexNode arg) { + return arg.getType().getSqlTypeName() == SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE + ? TimeZone.getTimeZone( + query.getTopNode().getCluster().getPlanner().getContext().unwrap(CalciteConnectionConfig.class).timeZone()) : + TimeZone.getTimeZone("UTC"); } private static String applyTimestampFormat(String arg, String format, TimeZone timeZone) { diff --git a/ql/src/test/queries/clientpositive/druid_timestamptz.q b/ql/src/test/queries/clientpositive/druid_timestamptz.q index 605d240ae2..b21ca9dcf9 100644 --- a/ql/src/test/queries/clientpositive/druid_timestamptz.q +++ b/ql/src/test/queries/clientpositive/druid_timestamptz.q @@ -11,6 +11,14 @@ TBLPROPERTIES ("druid.segment.granularity" = "HOUR"); insert into table tstz1_n0 values(cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone), 'Bill', 10); +-- Create table with druid time column as timestamp +create table tstz1_n1(`__time` timestamp, n string, v integer) +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR"); + +insert into table tstz1_n1 +values(cast('2016-01-03 12:26:34' as timestamp), 'Bill', 10); + EXPLAIN select `__time` from tstz1_n0; select `__time` from tstz1_n0; @@ -26,7 +34,22 @@ SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n0; EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n0; SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n0; +EXPLAIN SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time`; +SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time`; + +EXPLAIN select `__time` from tstz1_n1; +select `__time` from tstz1_n1; + +EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1; +SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1; + +EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1; +SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1; + +EXPLAIN SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time`; +SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time`; +-- Change timezone to UTC and test again set time zone UTC; EXPLAIN select `__time` from tstz1_n0; select `__time` from tstz1_n0; @@ -50,3 +73,20 @@ SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n0; EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n0; SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n0; + +EXPLAIN SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time`; +SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time`; + +EXPLAIN select `__time` from tstz1_n1; +select `__time` from tstz1_n1; + +EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1; +SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1; + +EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1; +SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1; + +EXPLAIN SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time`; +SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time`; + + diff --git a/ql/src/test/queries/clientpositive/druidmini_expressions.q b/ql/src/test/queries/clientpositive/druidmini_expressions.q index 7857973611..273c803154 100644 --- a/ql/src/test/queries/clientpositive/druidmini_expressions.q +++ b/ql/src/test/queries/clientpositive/druidmini_expressions.q @@ -127,12 +127,15 @@ explain SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) a SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1, DATE_SUB(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_2 from druid_table_n0 order by date_1, date_2 limit 3; -- Boolean Values +-- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319 +-- It should get fixed once we upgrade calcite + EXPLAIN SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2; SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2; - + -- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319 -- It should get fixed once we upgrade calcite SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2; - + EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2; DROP TABLE druid_table_n0; diff --git a/ql/src/test/queries/clientpositive/druidmini_test_ts.q b/ql/src/test/queries/clientpositive/druidmini_test_ts.q new file mode 100644 index 0000000000..9e45ae601e --- /dev/null +++ b/ql/src/test/queries/clientpositive/druidmini_test_ts.q @@ -0,0 +1,64 @@ +--! qt:dataset:alltypesorc +CREATE TABLE druid_table_test_ts +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT `ctimestamp1` as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL; + +-- Time Series Query +SELECT count(*) FROM druid_table_test_ts; + +SELECT floor_year(`__time`), SUM(cfloat), SUM(cdouble), SUM(ctinyint), SUM(csmallint),SUM(cint), SUM(cbigint) +FROM druid_table_test_ts GROUP BY floor_year(`__time`); + +SELECT floor_year(`__time`), MIN(cfloat), MIN(cdouble), MIN(ctinyint), MIN(csmallint),MIN(cint), MIN(cbigint) +FROM druid_table_test_ts GROUP BY floor_year(`__time`); + +SELECT floor_year(`__time`), MAX(cfloat), MAX(cdouble), MAX(ctinyint), MAX(csmallint),MAX(cint), MAX(cbigint) +FROM druid_table_test_ts GROUP BY floor_year(`__time`); + + +-- Group By + +SELECT cstring1, SUM(cdouble) as s FROM druid_table_test_ts GROUP BY cstring1 ORDER BY s ASC LIMIT 10; + +SELECT cstring2, MAX(cdouble) FROM druid_table_test_ts GROUP BY cstring2 ORDER BY cstring2 ASC LIMIT 10; + + +-- TIME STUFF + +SELECT `__time` +FROM druid_table_test_ts ORDER BY `__time` ASC LIMIT 10; + +SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` < '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10; + +SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10; + +SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' + AND `__time` < '2011-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10; + +SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10;; + +SELECT `__time` +FROM druid_table_test_ts +WHERE (`__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00') + OR (`__time` BETWEEN '1968-02-01 00:00:00' AND '1970-04-01 00:00:00') ORDER BY `__time` ASC LIMIT 10; diff --git a/ql/src/test/results/clientpositive/druid/druid_basic2.q.out b/ql/src/test/results/clientpositive/druid/druid_basic2.q.out index 88916b9d6c..13abba0763 100644 --- a/ql/src/test/results/clientpositive/druid/druid_basic2.q.out +++ b/ql/src/test/results/clientpositive/druid/druid_basic2.q.out @@ -730,7 +730,7 @@ STAGE PLANS: properties: druid.fieldNames robot,floor_day,$f3,$f4,(tok_function tok_int (tok_table_or_col robot)) druid.fieldTypes string,timestamp with local time zone,float,double,int - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"(tok_function tok_int (tok_table_or_col robot))","direction":"ascending","dimensionOrder":"numeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"(tok_function tok_int (tok_table_or_col robot))","expression":"CAST(\"robot\", 'LONG')"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"(tok_function tok_int (tok_table_or_col robot))","direction":"ascending","dimensionOrder":"numeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"(tok_function tok_int (tok_table_or_col robot))","expression":"CAST(\"robot\", 'LONG')"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy GatherStats: false Select Operator @@ -795,7 +795,7 @@ STAGE PLANS: properties: druid.fieldNames robot,floor_day druid.fieldTypes string,timestamp with local time zone - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} druid.query.type groupBy Select Operator expressions: robot (type: string), floor_day (type: timestamp with local time zone) @@ -831,7 +831,7 @@ STAGE PLANS: properties: druid.fieldNames extract,robot druid.fieldTypes timestamp with local time zone,string - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} druid.query.type groupBy Select Operator expressions: robot (type: string), extract (type: timestamp with local time zone) @@ -867,7 +867,7 @@ STAGE PLANS: properties: druid.fieldNames robot,floor_day druid.fieldTypes string,timestamp with local time zone - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} druid.query.type groupBy Select Operator expressions: robot (type: string), floor_day (type: timestamp with local time zone) diff --git a/ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out b/ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out index fa9583a8e7..003b4d60ee 100644 --- a/ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out +++ b/ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out @@ -24,6 +24,28 @@ values(cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local ti POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tstz1_n0 +PREHOOK: query: create table tstz1_n1(`__time` timestamp, n string, v integer) +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tstz1_n1 +POSTHOOK: query: create table tstz1_n1(`__time` timestamp, n string, v integer) +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tstz1_n1 +PREHOOK: query: insert into table tstz1_n1 +values(cast('2016-01-03 12:26:34' as timestamp), 'Bill', 10) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tstz1_n1 +POSTHOOK: query: insert into table tstz1_n1 +values(cast('2016-01-03 12:26:34' as timestamp), 'Bill', 10) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tstz1_n1 PREHOOK: query: EXPLAIN select `__time` from tstz1_n0 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select `__time` from tstz1_n0 @@ -74,7 +96,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes timestamp - druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} druid.query.type scan Select Operator expressions: vc (type: timestamp) @@ -107,7 +129,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes timestamp - druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["2016-01-03T20:26:34.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["2016-01-03T20:26:34.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} druid.query.type scan Select Operator expressions: vc (type: timestamp) @@ -140,7 +162,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes int - druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'HOUR','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'HOUR','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} druid.query.type scan Select Operator expressions: vc (type: int) @@ -173,7 +195,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes timestamp - druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(\"__time\",'PT1H','','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'PT1H','','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} druid.query.type scan Select Operator expressions: vc (type: timestamp) @@ -189,6 +211,171 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 12:00:00 +PREHOOK: query: EXPLAIN SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n0 + properties: + druid.fieldNames extract,$f1 + druid.fieldTypes timestamp with local time zone,int + druid.query.json {"queryType":"groupBy","dataSource":"default.tstz1_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"v"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: extract (type: timestamp with local time zone), $f1 (type: int) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time` +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n0 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n0 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 12:26:34.0 US/Pacific 10 +PREHOOK: query: EXPLAIN select `__time` from tstz1_n1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select `__time` from tstz1_n1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select `__time` from tstz1_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select `__time` from tstz1_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 12:26:34 +PREHOOK: query: EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n1 + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'HOUR','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +12 +PREHOOK: query: EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(\"__time\",'PT1H','','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 12:00:00 +PREHOOK: query: EXPLAIN SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n1 + properties: + druid.fieldNames extract,$f1 + druid.fieldTypes timestamp,int + druid.query.json {"queryType":"groupBy","dataSource":"default.tstz1_n1","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"v"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: extract (type: timestamp), $f1 (type: int) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time` +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 12:26:34 10 PREHOOK: query: EXPLAIN select `__time` from tstz1_n0 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select `__time` from tstz1_n0 @@ -338,7 +525,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes timestamp - druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["2016-01-03T20:26:34.000Z/2016-01-03T20:26:34.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n0","intervals":["2016-01-03T20:26:34.000Z/2016-01-03T20:26:34.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"1451852794000","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} druid.query.type scan Select Operator expressions: vc (type: timestamp) @@ -420,3 +607,168 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 20:00:00 +PREHOOK: query: EXPLAIN SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n0 + properties: + druid.fieldNames extract,$f1 + druid.fieldTypes timestamp with local time zone,int + druid.query.json {"queryType":"groupBy","dataSource":"default.tstz1_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"v"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: extract (type: timestamp with local time zone), $f1 (type: int) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time` +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n0 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time`, max(v) FROM tstz1_n0 GROUP BY `__time` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n0 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 20:26:34.0 UTC 10 +PREHOOK: query: EXPLAIN select `__time` from tstz1_n1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select `__time` from tstz1_n1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select `__time` from tstz1_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select `__time` from tstz1_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 12:26:34 +PREHOOK: query: EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n1 + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'HOUR','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +12 +PREHOOK: query: EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(\"__time\",'PT1H','','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 12:00:00 +PREHOOK: query: EXPLAIN SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1_n1 + properties: + druid.fieldNames extract,$f1 + druid.fieldTypes timestamp,int + druid.query.json {"queryType":"groupBy","dataSource":"default.tstz1_n1","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"v"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: extract (type: timestamp), $f1 (type: int) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time` +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time`, max(v) FROM tstz1_n1 GROUP BY `__time` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 12:26:34 10 diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index e322d8f91f..51470a4ad8 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -123,6 +123,7 @@ FROM druid_table_n0 WHERE SIN(cdouble) > 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### +0.0 0 1 0.0 0 0 PREHOOK: query: SELECT cstring1 || '_'|| cstring2, substring(cstring2, 2, 3) as concat , upper(cstring2), lower(cstring1), SUM(cdouble) as s FROM druid_table_n0 WHERE cstring1 IS NOT NULL AND cstring2 IS NOT NULL AND cstring2 like 'Y%' GROUP BY cstring1 || '_'|| cstring2, substring(cstring2, 2, 3), upper(cstring2), lower(cstring1) ORDER BY concat DESC LIMIT 10 PREHOOK: type: QUERY @@ -186,7 +187,7 @@ STAGE PLANS: properties: druid.fieldNames $f0,_o__c1,_o__c2,_o__c3,$f4,$f5 druid.fieldTypes double,int,bigint,double,bigint,bigint - druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table_n0","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"and","fields":[{"type":"expression","expression":"(ceil(\"cfloat\") > 0)"},{"type":"expression","expression":"((floor(\"cdouble\") * 2) < 1000)"}]},{"type":"and","fields":[{"type":"expression","expression":"((log(\"cdouble\") / 1.0) > 0)"},{"type":"expression","expression":"(cos(\"cint\") > 0)"}]},{"type":"expression","expression":"(sin(\"cdouble\") > 1)"}]},"aggregations":[{"type":"doubleSum","name":"$f0","expression":"(\"cfloat\" + CAST(1, 'DOUBLE'))"},{"type":"doubleSum","name":"$f1","expression":"(\"cdouble\" + CAST(\"ctinyint\", 'DOUBLE'))"},{"type":"longSum","name":"$f2","fieldName":"ctinyint"},{"type":"longSum","name":"$f3","fieldName":"csmallint"},{"type":"longSum","name":"$f4","fieldName":"cint"},{"type":"longSum","name":"$f5","fieldName":"cbigint"}],"postAggregations":[{"type":"expression","name":"_o__c1","expression":"CAST(\"$f1\", 'LONG')"},{"type":"expression","name":"_o__c2","expression":"(\"$f2\" + 1)"},{"type":"expression","name":"_o__c3","expression":"CAST((\"$f3\" + \"$f4\"), 'DOUBLE')"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table_n0","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"and","fields":[{"type":"expression","expression":"(ceil(\"cfloat\") > 0)"},{"type":"expression","expression":"((floor(\"cdouble\") * 2) < 1000)"}]},{"type":"and","fields":[{"type":"expression","expression":"((log(\"cdouble\") / 1.0) > 0)"},{"type":"expression","expression":"(cos(\"cint\") > 0)"}]},{"type":"expression","expression":"(sin(\"cdouble\") > 1)"}]},"aggregations":[{"type":"doubleSum","name":"$f0","expression":"(\"cfloat\" + CAST(1, 'DOUBLE'))"},{"type":"doubleSum","name":"$f1","expression":"(\"cdouble\" + CAST(\"ctinyint\", 'DOUBLE'))"},{"type":"longSum","name":"$f2","fieldName":"ctinyint"},{"type":"longSum","name":"$f3","fieldName":"csmallint"},{"type":"longSum","name":"$f4","fieldName":"cint"},{"type":"longSum","name":"$f5","fieldName":"cbigint"}],"postAggregations":[{"type":"expression","name":"_o__c1","expression":"CAST(\"$f1\", 'LONG')"},{"type":"expression","name":"_o__c2","expression":"(\"$f2\" + 1)"},{"type":"expression","name":"_o__c3","expression":"CAST((\"$f3\" + \"$f4\"), 'DOUBLE')"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Select Operator expressions: $f0 (type: double), _o__c1 (type: int), _o__c2 (type: bigint), _o__c3 (type: double), $f4 (type: bigint), $f5 (type: bigint) @@ -417,7 +418,7 @@ STAGE PLANS: properties: druid.fieldNames extract,cstring1,cstring2,$f3 druid.fieldTypes timestamp with local time zone,string,string,double - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"cstring2","outputName":"cstring2","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"cstring2","outputName":"cstring2","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 9173 Data size: 3625856 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -485,7 +486,7 @@ STAGE PLANS: properties: druid.fieldNames extract,cstring1,cdouble,$f3 druid.fieldTypes timestamp with local time zone,string,double,double - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"cdouble","outputName":"cdouble","outputType":"DOUBLE"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"cdouble","outputName":"cdouble","outputType":"DOUBLE"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 9173 Data size: 2091840 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -553,7 +554,7 @@ STAGE PLANS: properties: druid.fieldNames extract,cstring1,cstring2,$f3 druid.fieldTypes timestamp with local time zone,string,string,double - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"cstring2","outputName":"cstring2","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","expression":"(CAST(2, 'DOUBLE') * \"cdouble\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"cstring2","outputName":"cstring2","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","expression":"(CAST(2, 'DOUBLE') * \"cdouble\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 9173 Data size: 3625856 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -621,7 +622,7 @@ STAGE PLANS: properties: druid.fieldNames extract,cstring1,vc,$f3 druid.fieldTypes timestamp with local time zone,string,string,double - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"concat(concat(\"cstring2\",'_'),\"cstring1\")","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"concat(concat(\"cstring2\",'_'),\"cstring1\")","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 9173 Data size: 3625856 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -908,7 +909,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes bigint - druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"unix_timestamp(timestamp_format((1396681200 * '1000'),'yyyy-MM-dd HH:mm:ss','US/Pacific'),'yyyy-MM-dd HH:mm:ss')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":1} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"unix_timestamp(timestamp_format((1396681200 * '1000'),'yyyy-MM-dd HH:mm:ss','UTC'),'yyyy-MM-dd HH:mm:ss')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":1} druid.query.type scan Select Operator expressions: vc (type: bigint) @@ -923,7 +924,7 @@ POSTHOOK: query: select unix_timestamp(from_unixtime(1396681200)) from druid_tab POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### -1396656000 +1396681200 PREHOOK: query: explain select unix_timestamp(`__time`) from druid_table_n0 limit 1 PREHOOK: type: QUERY POSTHOOK: query: explain select unix_timestamp(`__time`) from druid_table_n0 limit 1 @@ -978,7 +979,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes string - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format((div(\"__time\",1000) * '1000'),'yyyy-MM-dd HH:mm:ss','US/Pacific')","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format((div(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),1000) * '1000'),'yyyy-MM-dd HH:mm:ss','UTC')","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator expressions: vc (type: string) @@ -1016,7 +1017,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes string - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(\"__time\",'P1Y','','US/Pacific'),'yyyy-MM-dd','US/Pacific')","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1Y','','UTC'),'yyyy-MM-dd','UTC')","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator expressions: vc (type: string) @@ -1067,7 +1068,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes string - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(\"__time\",'P1M','','US/Pacific'),'yyyy-MM-dd','US/Pacific')","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1M','','UTC'),'yyyy-MM-dd','UTC')","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator expressions: vc (type: string) @@ -1118,7 +1119,7 @@ STAGE PLANS: properties: druid.fieldNames vc druid.fieldTypes string - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(\"__time\",'P3M','','US/Pacific'),'yyyy-MM-dd','US/Pacific')","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P3M','','UTC'),'yyyy-MM-dd','UTC')","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator expressions: vc (type: string) @@ -1199,7 +1200,7 @@ STAGE PLANS: properties: druid.fieldNames vc,$f1 druid.fieldTypes date,double - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(timestamp_format(timestamp_floor(\"__time\",'P1M','','US/Pacific'),'yyyy-MM-dd','US/Pacific'),'','US/Pacific'),'P1D','','US/Pacific')","outputType":"LONG"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f1","expression":"(\"cdouble\" * \"cdouble\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1M','','UTC'),'yyyy-MM-dd','UTC'),'','UTC'),'P1D','','UTC')","outputType":"LONG"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f1","expression":"(\"cdouble\" * \"cdouble\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator expressions: $f1 (type: double), vc (type: date) @@ -1244,7 +1245,7 @@ STAGE PLANS: properties: druid.fieldNames vc,vc0 druid.fieldTypes date,date - druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_shift(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'P1D',CAST((\"cdouble\" / CAST(1000, 'DOUBLE')), 'LONG'),'US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_shift(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'P1D',-( CAST((\"cdouble\" / CAST(1000, 'DOUBLE')), 'LONG') ),'US/Pacific')","outputType":"LONG"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_shift(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'P1D',CAST((\"cdouble\" / CAST(1000, 'DOUBLE')), 'LONG'),'UTC')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_shift(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'P1D',-( CAST((\"cdouble\" / CAST(1000, 'DOUBLE')), 'LONG') ),'UTC')","outputType":"LONG"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} druid.query.type scan Statistics: Num rows: 9173 Data size: 976192 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1291,17 +1292,44 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-03-19 1970-10-14 1969-11-13 1970-02-17 PREHOOK: query: -- Boolean Values - SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2 + + + EXPLAIN SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2 +PREHOOK: type: QUERY +POSTHOOK: query: -- Boolean Values + + + EXPLAIN SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_n0 + properties: + druid.fieldNames cboolean2,$f1 + druid.fieldTypes boolean,bigint + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"cboolean2","outputName":"cboolean2","outputType":"LONG"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"count","name":"$f1"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: cboolean2 (type: boolean), $f1 (type: bigint) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_n0 PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: -- Boolean Values - SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2 +POSTHOOK: query: SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### -NULL 8 -false 3140 +false 3148 true 2957 PREHOOK: query: -- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319 -- It should get fixed once we upgrade calcite @@ -1316,7 +1344,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### false 2653 -false 3452 +true 3452 PREHOOK: query: EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2 @@ -1334,7 +1362,7 @@ STAGE PLANS: properties: druid.fieldNames vc,$f1 druid.fieldTypes boolean,bigint - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"(\"ctinyint\" > 2)","outputType":"FLOAT"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"count","name":"$f1"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"(\"ctinyint\" > 2)","outputType":"LONG"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"count","name":"$f1"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator expressions: vc (type: boolean), $f1 (type: bigint) diff --git a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out index 116c3266f4..a7fb6c5a17 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out @@ -510,7 +510,7 @@ STAGE PLANS: properties: druid.fieldNames vc,vc0,vc1 druid.fieldTypes int,bigint,string - druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 8, 2), 'DOUBLE') == 31)"},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"div(timestamp_extract(\"__time\",'DAY','US/Pacific'),7)","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 8, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 8, 2), 'DOUBLE') == 31)"},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"div(timestamp_extract(\"__time\",'DAY','US/Pacific'),7)","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 8, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} druid.query.type scan Select Operator expressions: vc (type: int), vc0 (type: bigint), vc1 (type: string) @@ -584,7 +584,7 @@ STAGE PLANS: properties: druid.fieldNames vc,vc0,vc1 druid.fieldTypes double,int,string - druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M","timeZone":"US/Pacific","locale":"en-US"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / CAST(4, 'DOUBLE')) + CAST(1, 'DOUBLE'))","outputType":"DOUBLE"},{"type":"expression","name":"vc0","expression":"timestamp_extract(\"__time\",'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 5, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M","timeZone":"US/Pacific","locale":"en-US"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / CAST(4, 'DOUBLE')) + CAST(1, 'DOUBLE'))","outputType":"DOUBLE"},{"type":"expression","name":"vc0","expression":"timestamp_extract(\"__time\",'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 5, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} druid.query.type scan Select Operator expressions: vc (type: double), vc0 (type: int), vc1 (type: string) @@ -658,7 +658,7 @@ STAGE PLANS: properties: druid.fieldNames vc,vc0 druid.fieldTypes int,string - druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"yyyy","timeZone":"US/Pacific","locale":"en-US"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pacific'), 'STRING') == '1969')"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 0, 4)","outputType":"STRING"}],"columns":["vc","vc0"],"resultFormat":"compactedList","limit":1} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"yyyy","timeZone":"US/Pacific","locale":"en-US"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pacific'), 'STRING') == '1969')"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 0, 4)","outputType":"STRING"}],"columns":["vc","vc0"],"resultFormat":"compactedList","limit":1} druid.query.type scan Select Operator expressions: vc (type: int), vc0 (type: string) @@ -727,7 +727,7 @@ STAGE PLANS: properties: druid.fieldNames vc,$f1 druid.fieldTypes date,double - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(\"__time\",'P1D','','US/Pacific')","outputType":"LONG"}],"limitSpec":{"type":"default","limit":5,"columns":[{"dimension":"vc","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cfloat"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC')","outputType":"LONG"}],"limitSpec":{"type":"default","limit":5,"columns":[{"dimension":"vc","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cfloat"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator expressions: vc (type: date), $f1 (type: double) @@ -823,7 +823,7 @@ STAGE PLANS: properties: druid.fieldNames vc,vc0,vc1,vc2,vc3,vc4,vc5,vc6,vc7 druid.fieldTypes int,int,int,int,int,int,int,int,int - druid.query.json {"queryType":"scan","dataSource":"default.druid_test_extract_from_string_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc2","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc3","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc4","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc5","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'HOUR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc6","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'MINUTE','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc7","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'SECOND','US/Pacific')","outputType":"LONG"}],"columns":["vc","vc0","vc1","vc2","vc3","vc4","vc5","vc6","vc7"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.druid_test_extract_from_string_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','UTC'),'P1D','','UTC'),'YEAR','UTC')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','UTC'),'P1D','','UTC'),'MONTH','UTC')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','UTC'),'P1D','','UTC'),'DAY','UTC')","outputType":"LONG"},{"type":"expression","name":"vc2","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','UTC'),'P1D','','UTC'),'YEAR','UTC')","outputType":"LONG"},{"type":"expression","name":"vc3","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','UTC'),'P1D','','UTC'),'MONTH','UTC')","outputType":"LONG"},{"type":"expression","name":"vc4","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','UTC'),'P1D','','UTC'),'DAY','UTC')","outputType":"LONG"},{"type":"expression","name":"vc5","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','UTC'),'HOUR','UTC')","outputType":"LONG"},{"type":"expression","name":"vc6","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','UTC'),'MINUTE','UTC')","outputType":"LONG"},{"type":"expression","name":"vc7","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','UTC'),'SECOND','UTC')","outputType":"LONG"}],"columns":["vc","vc0","vc1","vc2","vc3","vc4","vc5","vc6","vc7"],"resultFormat":"compactedList"} druid.query.type scan Select Operator expressions: vc (type: int), vc0 (type: int), vc1 (type: int), vc2 (type: int), vc3 (type: int), vc4 (type: int), vc5 (type: int), vc6 (type: int), vc7 (type: int) diff --git a/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out b/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out index 1c9e9c6718..d5394109b9 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out @@ -511,7 +511,7 @@ STAGE PLANS: properties: druid.fieldNames vc,vc0,vc1 druid.fieldTypes int,bigint,string - druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 8, 2), 'DOUBLE') == 31)"},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"div(timestamp_extract(\"__time\",'DAY','US/Pacific'),7)","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 8, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 8, 2), 'DOUBLE') == 31)"},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"div(timestamp_extract(\"__time\",'DAY','US/Pacific'),7)","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 8, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} druid.query.type scan Select Operator expressions: vc (type: int), vc0 (type: bigint), vc1 (type: string) @@ -585,7 +585,7 @@ STAGE PLANS: properties: druid.fieldNames vc,vc0,vc1 druid.fieldTypes double,int,string - druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M","timeZone":"US/Pacific","locale":"en-US"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / CAST(4, 'DOUBLE')) + CAST(1, 'DOUBLE'))","outputType":"DOUBLE"},{"type":"expression","name":"vc0","expression":"timestamp_extract(\"__time\",'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 5, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M","timeZone":"US/Pacific","locale":"en-US"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / CAST(4, 'DOUBLE')) + CAST(1, 'DOUBLE'))","outputType":"DOUBLE"},{"type":"expression","name":"vc0","expression":"timestamp_extract(\"__time\",'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 5, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} druid.query.type scan Select Operator expressions: vc (type: double), vc0 (type: int), vc1 (type: string) @@ -659,7 +659,7 @@ STAGE PLANS: properties: druid.fieldNames vc,vc0 druid.fieldTypes int,string - druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"yyyy","timeZone":"US/Pacific","locale":"en-US"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pacific'), 'STRING') == '1969')"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 0, 4)","outputType":"STRING"}],"columns":["vc","vc0"],"resultFormat":"compactedList","limit":1} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"yyyy","timeZone":"US/Pacific","locale":"en-US"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pacific'), 'STRING') == '1969')"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 0, 4)","outputType":"STRING"}],"columns":["vc","vc0"],"resultFormat":"compactedList","limit":1} druid.query.type scan Select Operator expressions: vc (type: int), vc0 (type: string) diff --git a/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out b/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out index 7f6c6b0aa3..c61ced647b 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out @@ -814,7 +814,7 @@ STAGE PLANS: properties: druid.fieldNames vc,vc0 druid.fieldTypes boolean,boolean - druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n3","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"isnull(\"cstring1\")","outputType":"FLOAT"},{"type":"expression","name":"vc0","expression":"notnull(\"cint\")","outputType":"FLOAT"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n3","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"isnull(\"cstring1\")","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"notnull(\"cint\")","outputType":"LONG"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} druid.query.type scan Select Operator expressions: vc (type: boolean), vc0 (type: boolean) diff --git a/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out b/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out new file mode 100644 index 0000000000..879e2a7f7b --- /dev/null +++ b/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out @@ -0,0 +1,263 @@ +PREHOOK: query: CREATE TABLE druid_table_test_ts +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT `ctimestamp1` as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table_test_ts +POSTHOOK: query: CREATE TABLE druid_table_test_ts +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT `ctimestamp1` as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table_test_ts +POSTHOOK: Lineage: druid_table_test_ts.__time SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] +POSTHOOK: Lineage: druid_table_test_ts.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: SELECT count(*) FROM druid_table_test_ts +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT count(*) FROM druid_table_test_ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +6105 +PREHOOK: query: SELECT floor_year(`__time`), SUM(cfloat), SUM(cdouble), SUM(ctinyint), SUM(csmallint),SUM(cint), SUM(cbigint) +FROM druid_table_test_ts GROUP BY floor_year(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT floor_year(`__time`), SUM(cfloat), SUM(cdouble), SUM(ctinyint), SUM(csmallint),SUM(cint), SUM(cbigint) +FROM druid_table_test_ts GROUP BY floor_year(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-01-01 00:00:00 -39590.24724686146 2.7308662809692383E7 -39967 7781089 1408069801800 10992545287 +PREHOOK: query: SELECT floor_year(`__time`), MIN(cfloat), MIN(cdouble), MIN(ctinyint), MIN(csmallint),MIN(cint), MIN(cbigint) +FROM druid_table_test_ts GROUP BY floor_year(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT floor_year(`__time`), MIN(cfloat), MIN(cdouble), MIN(ctinyint), MIN(csmallint),MIN(cint), MIN(cbigint) +FROM druid_table_test_ts GROUP BY floor_year(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-01-01 00:00:00 -1790.7781 -308691.84375 2 14255 -1073279343 -8577981133 +PREHOOK: query: SELECT floor_year(`__time`), MAX(cfloat), MAX(cdouble), MAX(ctinyint), MAX(csmallint),MAX(cint), MAX(cbigint) +FROM druid_table_test_ts GROUP BY floor_year(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT floor_year(`__time`), MAX(cfloat), MAX(cdouble), MAX(ctinyint), MAX(csmallint),MAX(cint), MAX(cbigint) +FROM druid_table_test_ts GROUP BY floor_year(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-01-01 00:00:00 769.16394 1.9565518E7 -45 -8101 1276572707 4923772860 +PREHOOK: query: SELECT cstring1, SUM(cdouble) as s FROM druid_table_test_ts GROUP BY cstring1 ORDER BY s ASC LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT cstring1, SUM(cdouble) as s FROM druid_table_test_ts GROUP BY cstring1 ORDER BY s ASC LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1cGVWH7n1QU -596096.6875 +821UdmGbkEf4j -14161.827026367188 +00iT08 0.0 +02v8WnLuYDos3Cq 0.0 +yv1js 0.0 +02VRbSC5I 0.0 +014ILGhXxNY7g02hl0Xw 0.0 +02vDyIVT752 0.0 +00PafC7v 0.0 +ytpx1RL8F2I 0.0 +PREHOOK: query: SELECT cstring2, MAX(cdouble) FROM druid_table_test_ts GROUP BY cstring2 ORDER BY cstring2 ASC LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT cstring2, MAX(cdouble) FROM druid_table_test_ts GROUP BY cstring2 ORDER BY cstring2 ASC LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL 1.9565518E7 +0034fkcXMQI3 15601.0 +004J8y 0.0 +00GNm -200.0 +00GW4dnb6Wgj52 -200.0 +00PBhB1Iefgk 0.0 +00d5kr1wEB7evExG 15601.0 +00qccwt8n 0.0 +017fFeQ3Gcsa83Xj2Vo0 0.0 +01EfkvNk6mjG44uxs 0.0 +PREHOOK: query: SELECT `__time` +FROM druid_table_test_ts ORDER BY `__time` ASC LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time` +FROM druid_table_test_ts ORDER BY `__time` ASC LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +PREHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` < '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` < '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +PREHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +PREHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' + AND `__time` < '2011-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' + AND `__time` < '2011-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +PREHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE `__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +PREHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE (`__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00') + OR (`__time` BETWEEN '1968-02-01 00:00:00' AND '1970-04-01 00:00:00') ORDER BY `__time` ASC LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_test_ts +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT `__time` +FROM druid_table_test_ts +WHERE (`__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00') + OR (`__time` BETWEEN '1968-02-01 00:00:00' AND '1970-04-01 00:00:00') ORDER BY `__time` ASC LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_test_ts +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 diff --git a/ql/src/test/results/clientpositive/druid_intervals.q.out b/ql/src/test/results/clientpositive/druid_intervals.q.out index fde446cecb..a5203c3182 100644 --- a/ql/src/test/results/clientpositive/druid_intervals.q.out +++ b/ql/src/test/results/clientpositive/druid_intervals.q.out @@ -375,7 +375,7 @@ STAGE PLANS: properties: druid.fieldNames vc,robot druid.fieldTypes timestamp with local time zone,string - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"in","dimension":"__time","values":["2010-01-01T00:00:00.000Z","2011-01-01T00:00:00.000Z"],"extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"selector","dimension":"robot","value":"user1"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","robot"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"in","dimension":"__time","values":["2010-01-01T08:00:00.000Z","2011-01-01T08:00:00.000Z"],"extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"selector","dimension":"robot","value":"user1"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","robot"],"resultFormat":"compactedList"} druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/druid_timeseries.q.out b/ql/src/test/results/clientpositive/druid_timeseries.q.out index 455bdd5421..8b29145ac3 100644 --- a/ql/src/test/results/clientpositive/druid_timeseries.q.out +++ b/ql/src/test/results/clientpositive/druid_timeseries.q.out @@ -27,7 +27,7 @@ STAGE PLANS: properties: druid.fieldNames $f0 druid.fieldTypes bigint - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"2009-12-31T16:00:00.000Z","lowerStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"bound","dimension":"__time","upper":"2012-02-29T16:00:00.000Z","upperStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}}]},{"type":"bound","dimension":"added","upper":"0.0","upperStrict":false,"ordering":"numeric"}]},"aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"2010-01-01T00:00:00.000Z","lowerStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"bound","dimension":"__time","upper":"2012-03-01T00:00:00.000Z","upperStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}]},{"type":"bound","dimension":"added","upper":"0.0","upperStrict":false,"ordering":"numeric"}]},"aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -83,7 +83,7 @@ STAGE PLANS: properties: druid.fieldNames $f0,$f1 druid.fieldTypes float,double - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"doubleMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"doubleMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -115,7 +115,7 @@ STAGE PLANS: properties: druid.fieldNames extract,$f1,$f2 druid.fieldTypes timestamp with local time zone,float,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -167,57 +167,26 @@ FROM druid_table_1_n3 GROUP BY floor_quarter(`__time`) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1_n3 - properties: - druid.fieldNames vc,added,variation - druid.fieldTypes timestamp with local time zone,float,float - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","added","variation"],"resultFormat":"compactedList"} - druid.query.type scan - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: floor_quarter(vc) (type: timestamp with local time zone), added (type: float), variation (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: max(_col1), sum(_col2) - keys: _col0 (type: timestamp with local time zone) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp with local time zone) - sort order: + - Map-reduce partition columns: _col0 (type: timestamp with local time zone) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: timestamp with local time zone) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P3M","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT floor_month(`__time`), max(added), sum(variation) diff --git a/ql/src/test/results/clientpositive/druid_topn.q.out b/ql/src/test/results/clientpositive/druid_topn.q.out index 7bfd2ae93b..3ce327ddb4 100644 --- a/ql/src/test/results/clientpositive/druid_topn.q.out +++ b/ql/src/test/results/clientpositive/druid_topn.q.out @@ -123,7 +123,7 @@ STAGE PLANS: properties: druid.fieldNames extract,robot,$f2,$f3 druid.fieldTypes timestamp with local time zone,string,float,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -159,7 +159,7 @@ STAGE PLANS: properties: druid.fieldNames robot,floor_year,$f2,$f3 druid.fieldTypes string,timestamp with local time zone,float,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_year","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_year","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -195,7 +195,7 @@ STAGE PLANS: properties: druid.fieldNames robot,floor_month,$f2,$f3 druid.fieldTypes string,timestamp with local time zone,float,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -231,7 +231,7 @@ STAGE PLANS: properties: druid.fieldNames robot,namespace,floor_month,$f3,$f4 druid.fieldTypes string,string,timestamp with local time zone,float,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"namespace","outputName":"namespace","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f4","direction":"descending","dimensionOrder":"numeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"namespace","outputName":"namespace","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f4","direction":"descending","dimensionOrder":"numeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -267,7 +267,7 @@ STAGE PLANS: properties: druid.fieldNames robot,namespace,floor_month,$f3,$f4 druid.fieldTypes string,string,timestamp with local time zone,float,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"namespace","outputName":"namespace","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"namespace","outputName":"namespace","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -305,7 +305,7 @@ STAGE PLANS: properties: druid.fieldNames floor_year,$f1_0,$f2 druid.fieldTypes timestamp with local time zone,float,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"floor_year","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f2","direction":"ascending","dimensionOrder":"numeric"}]},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"floor_year","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f2","direction":"ascending","dimensionOrder":"numeric"}]},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -347,7 +347,7 @@ STAGE PLANS: properties: druid.fieldNames robot,floor_hour,$f2,$f3 druid.fieldTypes string,timestamp with local time zone,float,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_hour","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f2","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["2010-01-01T08:00:00.000Z/2014-01-01T08:00:00.001Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_hour","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f2","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["2010-01-01T08:00:00.000Z/2014-01-01T08:00:00.001Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/infer_join_preds.q.out b/ql/src/test/results/clientpositive/infer_join_preds.q.out index 62b977f5c7..6d2ec57e22 100644 --- a/ql/src/test/results/clientpositive/infer_join_preds.q.out +++ b/ql/src/test/results/clientpositive/infer_join_preds.q.out @@ -1184,7 +1184,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (concat(CASE WHEN (_col1 is null) THEN (1) ELSE (_col1) END, ',', CASE WHEN (_col2 is null) THEN (1) ELSE (_col2) END, ',', CASE WHEN (_col3 is null) THEN (1) ELSE (_col3) END, ',', CASE WHEN (_col4 is null) THEN (1) ELSE (_col4) END, ',', CASE WHEN (_col5 is null) THEN ('') ELSE (_col5) END, ',', CASE WHEN (_col18 is null) THEN (1) ELSE (_col18) END, ',', CASE WHEN (_col6 is null) THEN (1) ELSE (_col6) END, ',', CASE WHEN (length(_col7) is null) THEN ('') ELSE (_col7) END, ',', CASE WHEN (_col8 is null) THEN (TIMESTAMP'2017-12-08 00:00:00') ELSE (_col8) END, ',', CASE WHEN (_col9 is null) THEN (1) ELSE (_col9) END, ',', CASE WHEN (_col10 is null) THEN (1) ELSE (_col10) END, ',', CASE WHEN (_col11 is null) THEN (1) ELSE (_col11) END, ',', CASE WHEN (_col12 is null) THEN (1) ELSE (_col12) END, ',', CASE WHEN (length(_col13) is null) THEN ('') ELSE (_col13) END, ',', CASE WHEN (length(_col14) is null) THEN ('') ELSE (_col14) END, ',', CASE WHEN (_col15 is null) THEN (1) ELSE (_col15) END, ',', CASE WHEN (_col16 is null) THEN (1) ELSE (_col16) END, ',', CASE WHEN (_col17 is null) THEN (1) ELSE (_col17) END) <> concat(CASE WHEN (length(_col20) is null) THEN ('') ELSE (_col20) END, ',', CASE WHEN (_col21 is null) THEN (1) ELSE (_col21) END, ',', CASE WHEN (_col22 is null) THEN (1) ELSE (_col22) END, ',', CASE WHEN (_col23 is null) THEN (1) ELSE (_col23) END, ',', CASE WHEN (_col24 is null) THEN (1) ELSE (_col24) END, ',', CASE WHEN (_col25 is null) THEN ('') ELSE (_col25) END, ',', CASE WHEN (_col38 is null) THEN (1) ELSE (_col38) END, ',', CASE WHEN (_col26 is null) THEN (1) ELSE (_col26) END, ',', CASE WHEN (length(_col27) is null) THEN ('') ELSE (_col27) END, ',', CASE WHEN (_col28 is null) THEN (TIMESTAMP'2017-12-08 00:00:00') ELSE (_col28) END, ',', CASE WHEN (_col29 is null) THEN (1) ELSE (_col29) END, ',', CASE WHEN (_col30 is null) THEN (1) ELSE (_col30) END, ',', CASE WHEN (_col31 is null) THEN (1) ELSE (_col31) END, ',', CASE WHEN (_col32 is null) THEN (1) ELSE (_col32) END, ',', CASE WHEN (length(_col33) is null) THEN ('') ELSE (_col33) END, ',', CASE WHEN (length(_col34) is null) THEN ('') ELSE (_col34) END, ',', CASE WHEN (_col35 is null) THEN (1) ELSE (_col35) END, ',', CASE WHEN (_col36 is null) THEN (1) ELSE (_col36) END, ',', CASE WHEN (_col37 is null) THEN (1) ELSE (_col37) END)) (type: boolean) + predicate: (concat(_col1, ',', CASE WHEN (_col2 is null) THEN (1) ELSE (_col2) END, ',', CASE WHEN (_col3 is null) THEN (1) ELSE (_col3) END, ',', CASE WHEN (_col4 is null) THEN (1) ELSE (_col4) END, ',', CASE WHEN (_col5 is null) THEN ('') ELSE (_col5) END, ',', CASE WHEN (_col18 is null) THEN (1) ELSE (_col18) END, ',', CASE WHEN (_col6 is null) THEN (1) ELSE (_col6) END, ',', CASE WHEN (length(_col7) is null) THEN ('') ELSE (_col7) END, ',', CASE WHEN (_col8 is null) THEN (TIMESTAMP'2017-12-08 00:00:00') ELSE (_col8) END, ',', CASE WHEN (_col9 is null) THEN (1) ELSE (_col9) END, ',', CASE WHEN (_col10 is null) THEN (1) ELSE (_col10) END, ',', CASE WHEN (_col11 is null) THEN (1) ELSE (_col11) END, ',', CASE WHEN (_col12 is null) THEN (1) ELSE (_col12) END, ',', CASE WHEN (length(_col13) is null) THEN ('') ELSE (_col13) END, ',', CASE WHEN (length(_col14) is null) THEN ('') ELSE (_col14) END, ',', CASE WHEN (_col15 is null) THEN (1) ELSE (_col15) END, ',', CASE WHEN (_col16 is null) THEN (1) ELSE (_col16) END, ',', CASE WHEN (_col17 is null) THEN (1) ELSE (_col17) END) <> concat(CASE WHEN (length(_col20) is null) THEN ('') ELSE (_col20) END, ',', _col21, ',', CASE WHEN (_col22 is null) THEN (1) ELSE (_col22) END, ',', CASE WHEN (_col23 is null) THEN (1) ELSE (_col23) END, ',', CASE WHEN (_col24 is null) THEN (1) ELSE (_col24) END, ',', CASE WHEN (_col25 is null) THEN ('') ELSE (_col25) END, ',', CASE WHEN (_col38 is null) THEN (1) ELSE (_col38) END, ',', CASE WHEN (_col26 is null) THEN (1) ELSE (_col26) END, ',', CASE WHEN (length(_col27) is null) THEN ('') ELSE (_col27) END, ',', CASE WHEN (_col28 is null) THEN (TIMESTAMP'2017-12-08 00:00:00') ELSE (_col28) END, ',', CASE WHEN (_col29 is null) THEN (1) ELSE (_col29) END, ',', CASE WHEN (_col30 is null) THEN (1) ELSE (_col30) END, ',', CASE WHEN (_col31 is null) THEN (1) ELSE (_col31) END, ',', CASE WHEN (_col32 is null) THEN (1) ELSE (_col32) END, ',', CASE WHEN (length(_col33) is null) THEN ('') ELSE (_col33) END, ',', CASE WHEN (length(_col34) is null) THEN ('') ELSE (_col34) END, ',', CASE WHEN (_col35 is null) THEN (1) ELSE (_col35) END, ',', CASE WHEN (_col36 is null) THEN (1) ELSE (_col36) END, ',', CASE WHEN (_col37 is null) THEN (1) ELSE (_col37) END)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: bigint) diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out index 46c40b4bd7..2ccde41e29 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out @@ -260,29 +260,29 @@ Stage-0 limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=1 width=185) - Conds:RS_20.100, true=RS_24._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] + File Output Operator [FS_13] + Merge Join Operator [MERGEJOIN_18] (rows=1 width=185) + Conds:RS_21.100, true=RS_25._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_20] + SHUFFLE [RS_21] PartitionCols:100, true - Select Operator [SEL_19] (rows=1 width=193) + Select Operator [SEL_20] (rows=1 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_18] (rows=1 width=185) + Filter Operator [FIL_19] (rows=1 width=185) predicate:false TableScan [TS_0] (rows=10 width=185) default@table1_n10,table1_n10,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"] <-Map 3 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_24] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_23] (rows=1 width=8) + Group By Operator [GBY_24] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_22] (rows=1 width=8) + Select Operator [SEL_23] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_21] (rows=1 width=4) - predicate:(id = 100) - TableScan [TS_3] (rows=5 width=4) - default@table3_n0,table3_n0,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] + Filter Operator [FIL_22] (rows=1 width=8) + predicate:false + TableScan [TS_3] (rows=5 width=3) + default@table3_n0,table3_n0,Tbl:COMPLETE,Col:COMPLETE PREHOOK: query: select table1_n10.id, table1_n10.val, table1_n10.val1 from table1_n10 left semi join table3_n0 on table1_n10.dimid = table3_n0.id and table3_n0.id = 100 where table1_n10.dimid <> 100 PREHOOK: type: QUERY @@ -358,29 +358,29 @@ Stage-0 limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=1 width=185) - Conds:RS_20.100, true=RS_24._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] + File Output Operator [FS_13] + Merge Join Operator [MERGEJOIN_18] (rows=1 width=185) + Conds:RS_21.100, true=RS_25._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_20] + SHUFFLE [RS_21] PartitionCols:100, true - Select Operator [SEL_19] (rows=1 width=193) + Select Operator [SEL_20] (rows=1 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_18] (rows=1 width=185) + Filter Operator [FIL_19] (rows=1 width=185) predicate:false TableScan [TS_0] (rows=10 width=185) default@table1_n10,table1_n10,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"] <-Map 3 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_24] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_23] (rows=1 width=8) + Group By Operator [GBY_24] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_22] (rows=1 width=8) + Select Operator [SEL_23] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_21] (rows=1 width=4) - predicate:(id = 100) - TableScan [TS_3] (rows=5 width=4) - default@table3_n0,table3_n0,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] + Filter Operator [FIL_22] (rows=1 width=8) + predicate:false + TableScan [TS_3] (rows=5 width=3) + default@table3_n0,table3_n0,Tbl:COMPLETE,Col:COMPLETE PREHOOK: query: select table1_n10.id, table1_n10.val, table1_n10.val1 from table1_n10 left semi join table3_n0 on table1_n10.dimid = table3_n0.id and table3_n0.id = 100 where table1_n10.dimid = 200 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index f9018b4273..708fa17617 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -477,7 +477,7 @@ Stage-0 Select Operator [SEL_21] (rows=1 width=20) Output:["_col1","_col4"] Merge Join Operator [MERGEJOIN_57] (rows=1 width=20) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or _col1 is not null)} {((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_17] PartitionCols:_col0 @@ -500,7 +500,7 @@ Stage-0 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_37] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [SIMPLE_EDGE] llap @@ -516,7 +516,7 @@ Stage-0 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float Filter Operator [FIL_38] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -571,7 +571,7 @@ Stage-0 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_34] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [SIMPLE_EDGE] llap @@ -587,7 +587,7 @@ Stage-0 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float Filter Operator [FIL_35] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -624,7 +624,7 @@ Stage-0 Select Operator [SEL_21] (rows=1 width=20) Output:["_col1","_col4"] Merge Join Operator [MERGEJOIN_56] (rows=1 width=20) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or _col1 is not null)} {((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_17] PartitionCols:_col0 @@ -647,7 +647,7 @@ Stage-0 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_36] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [SIMPLE_EDGE] llap @@ -663,7 +663,7 @@ Stage-0 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float Filter Operator [FIL_37] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -718,7 +718,7 @@ Stage-0 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_34] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [SIMPLE_EDGE] llap @@ -734,7 +734,7 @@ Stage-0 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float Filter Operator [FIL_35] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1587,7 +1587,7 @@ Stage-0 Group By Operator [GBY_3] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_41] (rows=1 width=93) - predicate:((((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0D)) and ((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) + predicate:((((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0D)) and ((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [SIMPLE_EDGE] llap @@ -1605,7 +1605,7 @@ Stage-0 Group By Operator [GBY_12] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float Filter Operator [FIL_42] (rows=1 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) TableScan [TS_9] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index fb9cfd253d..a2748a5b98 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -834,7 +834,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 Statistics: Num rows: 1 Data size: 1576 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col10 = 0L) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) + predicate: ((_col10 = 0L) or (_col13 is null and (_col11 >= _col10))) (type: boolean) Statistics: Num rows: 1 Data size: 1576 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index dc5461111b..87681d1477 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -1800,15 +1800,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 28 Data size: 17955 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col5 BETWEEN CASE WHEN (_col10 is null) THEN (null) ELSE (_col9) END AND _col12 (type: boolean) - Statistics: Num rows: 3 Data size: 1923 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col10 is null) THEN (_col5 BETWEEN null AND _col12) ELSE (_col5 BETWEEN _col9 AND _col12) END (type: boolean) + Statistics: Num rows: 14 Data size: 8977 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 1923 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 8977 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1923 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 8977 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2329,15 +2329,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 26 Data size: 16262 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToLong(_col5) <> CASE WHEN (_col10 is null) THEN (0) ELSE (_col9) END) (type: boolean) - Statistics: Num rows: 26 Data size: 16262 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CASE WHEN (_col10 is null) THEN ((UDFToLong(_col5) <> 0)) ELSE ((UDFToLong(_col5) <> _col9)) END (type: boolean) + Statistics: Num rows: 13 Data size: 8131 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4713,15 +4713,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col2) <> CASE WHEN (_col11 is null) THEN (0) ELSE (_col10) END) (type: boolean) - Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col11 is null) THEN ((UDFToLong(_col2) <> 0)) ELSE ((UDFToLong(_col2) <> _col10)) END (type: boolean) + Statistics: Num rows: 2 Data size: 1390 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1390 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1390 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4974,17 +4974,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col2) <> CASE WHEN (_col11 is null) THEN (0) ELSE (_col10) END) (type: boolean) - Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col11 is null) THEN ((UDFToLong(_col2) <> 0)) ELSE ((UDFToLong(_col2) <> _col10)) END (type: boolean) + Statistics: Num rows: 2 Data size: 1390 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1390 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1390 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) Reducer 3 Execution mode: llap @@ -4996,9 +4996,9 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14 - Statistics: Num rows: 5 Data size: 3823 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1529 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col0) > CASE WHEN (_col14 is null) THEN (0) ELSE (_col13) END) (type: boolean) + predicate: CASE WHEN (_col14 is null) THEN ((UDFToLong(_col0) > 0)) ELSE ((UDFToLong(_col0) > _col13)) END (type: boolean) Statistics: Num rows: 1 Data size: 764 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) @@ -6003,15 +6003,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 26 Data size: 16406 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToLong(_col5) <> CASE WHEN (_col10 is null) THEN (0) ELSE (_col9) END) (type: boolean) - Statistics: Num rows: 26 Data size: 16406 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CASE WHEN (_col10 is null) THEN ((UDFToLong(_col5) <> 0)) ELSE ((UDFToLong(_col5) <> _col9)) END (type: boolean) + Statistics: Num rows: 13 Data size: 8203 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/subquery_views.q.out b/ql/src/test/results/clientpositive/llap/subquery_views.q.out index 05c91cdae9..370accb94e 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_views.q.out @@ -239,7 +239,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4, _col5, _col8 Statistics: Num rows: 25 Data size: 4950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) Statistics: Num rows: 12 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) diff --git a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 78098b4463..29ae9642b1 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -1251,7 +1251,7 @@ where (case when cint % 2 = 0 then cint else 0 end) = cint) a POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -0 +4086 PREHOOK: query: select count(*) from ( select diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index 1d203f7395..919b71234d 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -1871,7 +1871,7 @@ POSTHOOK: query: explain extended select key, value from pcr_t1 where ds>='2000- POSTHOOK: type: QUERY OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`pcr_t1` -WHERE `ds` >= '2000-04-08' OR `ds` < '2000-04-10' +WHERE `ds` >= '2000-04-08' OR `ds` IS NOT NULL ORDER BY `key`, `value` STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1883,7 +1883,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcr_t1 - filterExpr: ((ds >= '2000-04-08') or (ds < '2000-04-10')) (type: boolean) + filterExpr: ((ds >= '2000-04-08') or ds is not null) (type: boolean) Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query11.q.out b/ql/src/test/results/clientpositive/perf/spark/query11.q.out index f0e8732812..87a0cc03eb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query11.q.out @@ -157,13 +157,13 @@ STAGE PLANS: Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975) Reducer 12 <- Reducer 11 (GROUP, 481) - Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 398), Map 19 (PARTITION-LEVEL SORT, 398) - Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 975), Reducer 16 (PARTITION-LEVEL SORT, 975) - Reducer 18 <- Reducer 17 (GROUP, 481) + Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) + Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) + Reducer 18 <- Reducer 17 (GROUP, 186) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 154), Map 25 (PARTITION-LEVEL SORT, 154) - Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 706), Reducer 22 (PARTITION-LEVEL SORT, 706) - Reducer 24 <- Reducer 23 (GROUP, 186) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398) + Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 975), Reducer 22 (PARTITION-LEVEL SORT, 975) + Reducer 24 <- Reducer 23 (GROUP, 481) Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706) Reducer 4 <- Reducer 3 (GROUP, 186) Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 444), Reducer 18 (PARTITION-LEVEL SORT, 444), Reducer 24 (PARTITION-LEVEL SORT, 444), Reducer 4 (PARTITION-LEVEL SORT, 444) @@ -232,31 +232,31 @@ STAGE PLANS: Map 15 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2)) + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 19 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -291,31 +291,31 @@ STAGE PLANS: Map 21 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 25 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -483,12 +483,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Reducer 17 Reduce Operator Tree: @@ -499,22 +499,22 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), (_col3 - _col2) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col7) keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(18,2)) Reducer 18 Execution mode: vectorized @@ -524,17 +524,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(18,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: decimal(18,2)) + expressions: _col0 (type: string), _col7 (type: decimal(18,2)) + outputColumnNames: _col0, _col7 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col7 > 0) (type: boolean) + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: decimal(18,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(18,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -560,12 +567,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Reducer 23 Reduce Operator Tree: @@ -576,22 +583,22 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), (_col3 - _col2) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col7) keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(18,2)) Reducer 24 Execution mode: vectorized @@ -601,24 +608,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(18,2)) - outputColumnNames: _col0, _col7 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col7 > 0) (type: boolean) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(18,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(18,2)) + expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: decimal(18,2)) Reducer 3 Reduce Operator Tree: Join Operator @@ -676,19 +676,19 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) 3 _col0 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6, _col8 + outputColumnNames: _col1, _col3, _col5, _col7, _col8 Statistics: Num rows: 1149975359 Data size: 101451160012 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 / _col8) > (_col6 / _col3)) (type: boolean) - Statistics: Num rows: 383325119 Data size: 33817053278 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > (_col8 / _col3))) ELSE ((null > (_col8 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > null)) ELSE (null) END) END (type: boolean) + Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string) + expressions: _col7 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 383325119 Data size: 33817053278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 383325119 Data size: 33817053278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 6 Execution mode: vectorized @@ -696,7 +696,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 383325119 Data size: 33817053278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out b/ql/src/test/results/clientpositive/perf/spark/query4.q.out index 0993dc23c2..347261396c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out @@ -225,21 +225,21 @@ STAGE PLANS: Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975) Reducer 12 <- Reducer 11 (GROUP, 481) - Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 398), Map 19 (PARTITION-LEVEL SORT, 398) - Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 975), Reducer 16 (PARTITION-LEVEL SORT, 975) - Reducer 18 <- Reducer 17 (GROUP, 481) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 306), Map 25 (PARTITION-LEVEL SORT, 306) - Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 873), Reducer 22 (PARTITION-LEVEL SORT, 873) - Reducer 24 <- Reducer 23 (GROUP, 369) - Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 154), Map 31 (PARTITION-LEVEL SORT, 154) - Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 706), Reducer 28 (PARTITION-LEVEL SORT, 706) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 873), Reducer 2 (PARTITION-LEVEL SORT, 873) - Reducer 30 <- Reducer 29 (GROUP, 186) - Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 154), Map 37 (PARTITION-LEVEL SORT, 154) - Reducer 35 <- Map 38 (PARTITION-LEVEL SORT, 706), Reducer 34 (PARTITION-LEVEL SORT, 706) - Reducer 36 <- Reducer 35 (GROUP, 186) - Reducer 4 <- Reducer 3 (GROUP, 369) + Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 306), Map 19 (PARTITION-LEVEL SORT, 306) + Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 873), Reducer 16 (PARTITION-LEVEL SORT, 873) + Reducer 18 <- Reducer 17 (GROUP, 369) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 154), Map 25 (PARTITION-LEVEL SORT, 154) + Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 706), Reducer 22 (PARTITION-LEVEL SORT, 706) + Reducer 24 <- Reducer 23 (GROUP, 186) + Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 306), Map 31 (PARTITION-LEVEL SORT, 306) + Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 873), Reducer 28 (PARTITION-LEVEL SORT, 873) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706) + Reducer 30 <- Reducer 29 (GROUP, 369) + Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 398), Map 37 (PARTITION-LEVEL SORT, 398) + Reducer 35 <- Map 38 (PARTITION-LEVEL SORT, 975), Reducer 34 (PARTITION-LEVEL SORT, 975) + Reducer 36 <- Reducer 35 (GROUP, 481) + Reducer 4 <- Reducer 3 (GROUP, 186) Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 690), Reducer 18 (PARTITION-LEVEL SORT, 690), Reducer 24 (PARTITION-LEVEL SORT, 690), Reducer 30 (PARTITION-LEVEL SORT, 690), Reducer 36 (PARTITION-LEVEL SORT, 690), Reducer 4 (PARTITION-LEVEL SORT, 690) Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### @@ -247,21 +247,21 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_ext_discount_amt (type: decimal(7,2)), cs_ext_sales_price (type: decimal(7,2)), cs_ext_wholesale_cost (type: decimal(7,2)), cs_ext_list_price (type: decimal(7,2)) + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized Map 13 @@ -306,31 +306,31 @@ STAGE PLANS: Map 15 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2)) + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_ext_discount_amt (type: decimal(7,2)), cs_ext_sales_price (type: decimal(7,2)), cs_ext_wholesale_cost (type: decimal(7,2)), cs_ext_list_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized Map 19 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -365,21 +365,21 @@ STAGE PLANS: Map 21 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_ext_discount_amt (type: decimal(7,2)), cs_ext_sales_price (type: decimal(7,2)), cs_ext_wholesale_cost (type: decimal(7,2)), cs_ext_list_price (type: decimal(7,2)) + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized Map 25 @@ -424,21 +424,21 @@ STAGE PLANS: Map 27 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_ext_discount_amt (type: decimal(7,2)), cs_ext_sales_price (type: decimal(7,2)), cs_ext_wholesale_cost (type: decimal(7,2)), cs_ext_list_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized Map 31 @@ -483,31 +483,31 @@ STAGE PLANS: Map 33 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized Map 37 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -675,12 +675,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Reducer 17 Reduce Operator Tree: @@ -691,22 +691,22 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col7) keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) Reducer 18 Execution mode: vectorized @@ -716,17 +716,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: decimal(24,6)) + expressions: _col0 (type: string), _col7 (type: decimal(24,6)) + outputColumnNames: _col0, _col7 + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col7 > 0) (type: boolean) + Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: decimal(24,6)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(24,6)) Reducer 2 Reduce Operator Tree: Join Operator @@ -736,12 +743,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Reducer 22 Reduce Operator Tree: @@ -752,12 +759,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Reducer 23 Reduce Operator Tree: @@ -768,22 +775,22 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col7) keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) Reducer 24 Execution mode: vectorized @@ -793,23 +800,23 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(24,6)) outputColumnNames: _col0, _col7 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col7 > 0) (type: boolean) - Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(24,6)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(24,6)) Reducer 28 Reduce Operator Tree: @@ -820,12 +827,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Reducer 29 Reduce Operator Tree: @@ -836,22 +843,22 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col7) keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) Reducer 3 Reduce Operator Tree: @@ -862,22 +869,22 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col7) keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) Reducer 30 Execution mode: vectorized @@ -887,16 +894,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(24,6)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(24,6)) Reducer 34 Reduce Operator Tree: @@ -907,12 +914,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Reducer 35 Reduce Operator Tree: @@ -923,22 +930,22 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), ((((_col5 - _col4) - _col2) + _col3) / 2) (type: decimal(14,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col7) keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) Reducer 36 Execution mode: vectorized @@ -948,24 +955,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col7 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col7 > 0) (type: boolean) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(24,6)) + expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(24,6)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: decimal(24,6)) Reducer 4 Execution mode: vectorized Reduce Operator Tree: @@ -974,16 +974,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(24,6)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(24,6)) Reducer 5 Reduce Operator Tree: @@ -1001,19 +1001,19 @@ STAGE PLANS: 3 _col0 (type: string) 4 _col0 (type: string) 5 _col0 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6, _col8, _col10, _col12 + outputColumnNames: _col1, _col3, _col5, _col7, _col9, _col11, _col12 Statistics: Num rows: 1916625598 Data size: 169085266687 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col1 / _col8) > (_col10 / _col12)) and ((_col1 / _col8) > (_col6 / _col3))) (type: boolean) - Statistics: Num rows: 212958399 Data size: 18787251785 Basic stats: COMPLETE Column stats: NONE + predicate: (CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col12 / _col3))) ELSE ((null > (_col12 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END and CASE WHEN (_col7 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col1 / _col7))) ELSE ((null > (_col1 / _col7))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END) (type: boolean) + Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string) + expressions: _col11 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 212958399 Data size: 18787251785 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 212958399 Data size: 18787251785 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 6 Execution mode: vectorized @@ -1021,7 +1021,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 212958399 Data size: 18787251785 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query74.q.out b/ql/src/test/results/clientpositive/perf/spark/query74.q.out index c9083ac4f0..3678906bc0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query74.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query74.q.out @@ -129,13 +129,13 @@ STAGE PLANS: Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975) Reducer 12 <- Reducer 11 (GROUP, 481) - Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 398), Map 19 (PARTITION-LEVEL SORT, 398) - Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 975), Reducer 16 (PARTITION-LEVEL SORT, 975) - Reducer 18 <- Reducer 17 (GROUP, 481) + Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) + Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) + Reducer 18 <- Reducer 17 (GROUP, 186) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 154), Map 25 (PARTITION-LEVEL SORT, 154) - Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 706), Reducer 22 (PARTITION-LEVEL SORT, 706) - Reducer 24 <- Reducer 23 (GROUP, 186) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398) + Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 975), Reducer 22 (PARTITION-LEVEL SORT, 975) + Reducer 24 <- Reducer 23 (GROUP, 481) Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706) Reducer 4 <- Reducer 3 (GROUP, 186) Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 444), Reducer 18 (PARTITION-LEVEL SORT, 444), Reducer 24 (PARTITION-LEVEL SORT, 444), Reducer 4 (PARTITION-LEVEL SORT, 444) @@ -205,34 +205,34 @@ STAGE PLANS: Map 15 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_net_paid (type: decimal(7,2)) + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_net_paid (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized Map 19 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year) IN (2001, 2002) and (d_year = 2002) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2001, 2002) and (d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), 2002 (type: int) + expressions: d_date_sk (type: int), 2001 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -265,21 +265,21 @@ STAGE PLANS: Map 21 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_net_paid (type: decimal(7,2)) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_net_paid (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized Map 25 @@ -326,13 +326,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year) IN (2001, 2002) and (d_year = 2001) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2001, 2002) and (d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), 2001 (type: int) + expressions: d_date_sk (type: int), 2002 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -455,12 +455,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col4 (type: int) Reducer 17 Reduce Operator Tree: @@ -471,18 +471,18 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col4, _col6, _col7, _col8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col2) keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(7,2)) Reducer 18 Execution mode: vectorized @@ -492,17 +492,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) + expressions: _col0 (type: string), _col4 (type: decimal(7,2)) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col4 > 0) (type: boolean) + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col4 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -528,12 +535,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col4 (type: int) Reducer 23 Reduce Operator Tree: @@ -544,18 +551,18 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col4, _col6, _col7, _col8 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col2) keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(7,2)) Reducer 24 Execution mode: vectorized @@ -565,17 +572,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col4 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator @@ -609,21 +616,14 @@ STAGE PLANS: Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col4 (type: decimal(7,2)) - outputColumnNames: _col0, _col4 + outputColumnNames: _col0, _col1 Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col4 > 0) (type: boolean) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col4 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(7,2)) Reducer 5 Reduce Operator Tree: Join Operator @@ -636,19 +636,19 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) 3 _col0 (type: string) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9 + outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1149975359 Data size: 101451160012 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col9 / _col1) > (_col7 / _col3)) (type: boolean) - Statistics: Num rows: 383325119 Data size: 33817053278 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE ((null > (_col9 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > null)) ELSE (null) END) END (type: boolean) + Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col5 (type: string), _col6 (type: string) + expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 383325119 Data size: 33817053278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 383325119 Data size: 33817053278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 6 Execution mode: vectorized @@ -656,7 +656,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 383325119 Data size: 33817053278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/tez/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/query11.q.out index bf454a0e75..4dfd2933ba 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query11.q.out @@ -149,30 +149,30 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 17 (BROADCAST_EDGE) -Map 11 <- Reducer 16 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE) -Map 26 <- Reducer 22 (BROADCAST_EDGE) -Map 7 <- Reducer 18 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE) +Map 1 <- Reducer 21 (BROADCAST_EDGE) +Map 11 <- Reducer 23 (BROADCAST_EDGE) +Map 15 <- Reducer 20 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE) +Map 7 <- Reducer 22 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 13 <- Map 23 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 15 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 23 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 17 <- Map 24 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 24 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 15 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Map 23 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 19 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Map 24 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator @@ -182,16 +182,16 @@ Stage-0 File Output Operator [FS_360] Limit [LIM_359] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_358] (rows=383325119 width=88) + Select Operator [SEL_358] (rows=574987679 width=88) Output:["_col0"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_93] - Select Operator [SEL_92] (rows=383325119 width=88) + Select Operator [SEL_92] (rows=574987679 width=88) Output:["_col0"] - Filter Operator [FIL_91] (rows=383325119 width=88) - predicate:((_col1 / _col8) > (_col6 / _col3)) + Filter Operator [FIL_91] (rows=574987679 width=88) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > (_col8 / _col3))) ELSE ((null > (_col8 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > null)) ELSE (null) END) END Merge Join Operator [MERGEJOIN_283] (rows=1149975359 width=88) - Conds:RS_325._col0=RS_337._col0(Inner),RS_337._col0=RS_347._col0(Inner),RS_337._col0=RS_357._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8"] + Conds:RS_325._col0=RS_337._col0(Inner),RS_337._col0=RS_347._col0(Inner),RS_337._col0=RS_357._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8"] <-Reducer 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_337] PartitionCols:_col0 @@ -212,28 +212,28 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Merge Join Operator [MERGEJOIN_278] (rows=696954748 width=88) Conds:RS_33._col1=RS_316._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 23 [SIMPLE_EDGE] vectorized + <-Map 24 [SIMPLE_EDGE] vectorized SHUFFLE [RS_316] PartitionCols:_col0 Select Operator [SEL_312] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Filter Operator [FIL_311] (rows=80000000 width=860) predicate:(c_customer_id is not null and c_customer_sk is not null) - TableScan [TS_49] (rows=80000000 width=860) + TableScan [TS_71] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_33] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_277] (rows=633595212 width=88) Conds:RS_332._col0=RS_294._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 15 [SIMPLE_EDGE] vectorized + <-Map 19 [SIMPLE_EDGE] vectorized SHUFFLE [RS_294] PartitionCols:_col0 Select Operator [SEL_288] (rows=36524 width=1119) Output:["_col0"] Filter Operator [FIL_285] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_46] (rows=73049 width=1119) + TableScan [TS_68] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_332] @@ -244,22 +244,22 @@ Stage-0 predicate:((ss_customer_sk BETWEEN DynamicValue(RS_34_customer_c_customer_sk_min) AND DynamicValue(RS_34_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_34_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_21] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized + <-Reducer 22 [BROADCAST_EDGE] vectorized BROADCAST [RS_327] Group By Operator [GBY_326] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_304] Group By Operator [GBY_300] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_295] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_288] - <-Reducer 25 [BROADCAST_EDGE] vectorized + <-Reducer 26 [BROADCAST_EDGE] vectorized BROADCAST [RS_329] Group By Operator [GBY_328] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_322] Group By Operator [GBY_320] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] @@ -269,124 +269,124 @@ Stage-0 <-Reducer 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_347] PartitionCols:_col0 - Select Operator [SEL_346] (rows=348477374 width=88) + Select Operator [SEL_346] (rows=29040539 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_345] (rows=29040539 width=135) + predicate:(_col7 > 0) + Select Operator [SEL_344] (rows=87121617 width=135) + Output:["_col0","_col7"] + Group By Operator [GBY_343] (rows=87121617 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_60] (rows=174243235 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_58] (rows=174243235 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_280] (rows=174243235 width=135) + Conds:RS_55._col1=RS_318._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_312] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_279] (rows=158402938 width=135) + Conds:RS_342._col0=RS_296._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_296] + PartitionCols:_col0 + Select Operator [SEL_289] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_286] (rows=36524 width=1119) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_68] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_342] + PartitionCols:_col0 + Select Operator [SEL_341] (rows=144002668 width=135) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_340] (rows=144002668 width=135) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_43] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_339] + Group By Operator [GBY_338] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_305] + Group By Operator [GBY_301] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_297] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_289] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_357] + PartitionCols:_col0 + Select Operator [SEL_356] (rows=348477374 width=88) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_345] (rows=348477374 width=88) + Group By Operator [GBY_355] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_61] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_83] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_60] (rows=696954748 width=88) + Group By Operator [GBY_82] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_58] (rows=696954748 width=88) + Select Operator [SEL_80] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_280] (rows=696954748 width=88) - Conds:RS_55._col1=RS_313._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 23 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_282] (rows=696954748 width=88) + Conds:RS_77._col1=RS_313._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 24 [SIMPLE_EDGE] vectorized SHUFFLE [RS_313] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_312] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_55] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_77] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_279] (rows=633595212 width=88) - Conds:RS_344._col0=RS_290._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 15 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_281] (rows=633595212 width=88) + Conds:RS_354._col0=RS_290._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 19 [SIMPLE_EDGE] vectorized SHUFFLE [RS_290] PartitionCols:_col0 Select Operator [SEL_287] (rows=36524 width=1119) Output:["_col0"] Filter Operator [FIL_284] (rows=36524 width=1119) predicate:((d_year = 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_46] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + Please refer to the previous TableScan [TS_68] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_354] PartitionCols:_col0 - Select Operator [SEL_343] (rows=575995635 width=88) + Select Operator [SEL_353] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_342] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_56_customer_c_customer_sk_min) AND DynamicValue(RS_56_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_56_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_43] (rows=575995635 width=88) + Filter Operator [FIL_352] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_78_customer_c_customer_sk_min) AND DynamicValue(RS_78_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_78_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_65] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_339] - Group By Operator [GBY_338] (rows=1 width=12) + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_349] + Group By Operator [GBY_348] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_302] Group By Operator [GBY_298] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_291] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_287] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_341] - Group By Operator [GBY_340] (rows=1 width=12) + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_351] + Group By Operator [GBY_350] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_321] Group By Operator [GBY_319] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] Select Operator [SEL_314] (rows=80000000 width=860) Output:["_col0"] Please refer to the previous Select Operator [SEL_312] - <-Reducer 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_357] - PartitionCols:_col0 - Select Operator [SEL_356] (rows=29040539 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_355] (rows=29040539 width=135) - predicate:(_col7 > 0) - Select Operator [SEL_354] (rows=87121617 width=135) - Output:["_col0","_col7"] - Group By Operator [GBY_353] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_82] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_81] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_79] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_282] (rows=174243235 width=135) - Conds:RS_76._col1=RS_318._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_76] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_281] (rows=158402938 width=135) - Conds:RS_352._col0=RS_296._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_296] - PartitionCols:_col0 - Select Operator [SEL_289] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_286] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_46] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] - PartitionCols:_col0 - Select Operator [SEL_351] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_350] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_64] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_349] - Group By Operator [GBY_348] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_305] - Group By Operator [GBY_301] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_297] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_325] PartitionCols:_col0 @@ -403,7 +403,7 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Merge Join Operator [MERGEJOIN_276] (rows=174243235 width=135) Conds:RS_12._col1=RS_315._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 23 [SIMPLE_EDGE] vectorized + <-Map 24 [SIMPLE_EDGE] vectorized SHUFFLE [RS_315] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_312] @@ -412,7 +412,7 @@ Stage-0 PartitionCols:_col1 Merge Join Operator [MERGEJOIN_275] (rows=158402938 width=135) Conds:RS_310._col0=RS_292._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 15 [SIMPLE_EDGE] vectorized + <-Map 19 [SIMPLE_EDGE] vectorized SHUFFLE [RS_292] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_287] @@ -425,11 +425,11 @@ Stage-0 predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) TableScan [TS_0] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized + <-Reducer 21 [BROADCAST_EDGE] vectorized BROADCAST [RS_307] Group By Operator [GBY_306] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_303] Group By Operator [GBY_299] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query4.q.out b/ql/src/test/results/clientpositive/perf/tez/query4.q.out index 75d5423d47..3f663cdfeb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query4.q.out @@ -217,42 +217,42 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 26 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) -Map 11 <- Reducer 25 (BROADCAST_EDGE), Reducer 34 (BROADCAST_EDGE) -Map 15 <- Reducer 28 (BROADCAST_EDGE), Reducer 37 (BROADCAST_EDGE) -Map 19 <- Reducer 24 (BROADCAST_EDGE) -Map 38 <- Reducer 32 (BROADCAST_EDGE) -Map 7 <- Reducer 27 (BROADCAST_EDGE), Reducer 36 (BROADCAST_EDGE) +Map 1 <- Reducer 30 (BROADCAST_EDGE) +Map 11 <- Reducer 32 (BROADCAST_EDGE), Reducer 38 (BROADCAST_EDGE) +Map 15 <- Reducer 33 (BROADCAST_EDGE) +Map 19 <- Reducer 29 (BROADCAST_EDGE), Reducer 36 (BROADCAST_EDGE) +Map 23 <- Reducer 28 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) +Map 7 <- Reducer 31 (BROADCAST_EDGE), Reducer 37 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 13 <- Map 33 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 13 <- Map 34 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 17 <- Map 33 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 17 <- Map 34 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 21 <- Map 33 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 21 <- Map 34 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 23 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE) -Reducer 3 <- Map 33 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 33 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Reducer 30 (SIMPLE_EDGE) -Reducer 32 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 33 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 33 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Map 33 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 33 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 25 <- Map 34 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE) +Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 27 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 34 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 27 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 27 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 27 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 27 (CUSTOM_SIMPLE_EDGE) +Reducer 35 <- Map 34 (CUSTOM_SIMPLE_EDGE) +Reducer 36 <- Map 34 (CUSTOM_SIMPLE_EDGE) +Reducer 37 <- Map 34 (CUSTOM_SIMPLE_EDGE) +Reducer 38 <- Map 34 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 23 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Map 33 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 27 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Map 34 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator @@ -262,26 +262,26 @@ Stage-0 File Output Operator [FS_587] Limit [LIM_586] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_585] (rows=212958399 width=88) + Select Operator [SEL_585] (rows=479156399 width=88) Output:["_col0"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_139] - Select Operator [SEL_138] (rows=212958399 width=88) + Select Operator [SEL_138] (rows=479156399 width=88) Output:["_col0"] - Filter Operator [FIL_136] (rows=212958399 width=88) - predicate:(((_col1 / _col8) > (_col10 / _col12)) and ((_col1 / _col8) > (_col6 / _col3))) + Filter Operator [FIL_136] (rows=479156399 width=88) + predicate:(CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col12 / _col3))) ELSE ((null > (_col12 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END and CASE WHEN (_col7 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col1 / _col7))) ELSE ((null > (_col1 / _col7))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END) Merge Join Operator [MERGEJOIN_470] (rows=1916625598 width=88) - Conds:RS_532._col0=RS_544._col0(Inner),RS_544._col0=RS_554._col0(Inner),RS_544._col0=RS_566._col0(Inner),RS_544._col0=RS_574._col0(Inner),RS_544._col0=RS_584._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col10","_col12"] + Conds:RS_530._col0=RS_542._col0(Inner),RS_542._col0=RS_554._col0(Inner),RS_542._col0=RS_564._col0(Inner),RS_542._col0=RS_574._col0(Inner),RS_542._col0=RS_584._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col9","_col11","_col12"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_544] + SHUFFLE [RS_542] PartitionCols:_col0 - Select Operator [SEL_543] (rows=116159124 width=88) + Select Operator [SEL_541] (rows=116159124 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_542] (rows=116159124 width=88) + Filter Operator [FIL_540] (rows=116159124 width=88) predicate:(_col7 > 0) - Select Operator [SEL_541] (rows=348477374 width=88) + Select Operator [SEL_539] (rows=348477374 width=88) Output:["_col0","_col7"] - Group By Operator [GBY_540] (rows=348477374 width=88) + Group By Operator [GBY_538] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_39] @@ -291,357 +291,357 @@ Stage-0 Select Operator [SEL_36] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Merge Join Operator [MERGEJOIN_461] (rows=696954748 width=88) - Conds:RS_33._col1=RS_512._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_512] + Conds:RS_33._col1=RS_515._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_515] PartitionCols:_col0 - Select Operator [SEL_506] (rows=80000000 width=860) + Select Operator [SEL_509] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_505] (rows=80000000 width=860) + Filter Operator [FIL_508] (rows=80000000 width=860) predicate:(c_customer_id is not null and c_customer_sk is not null) - TableScan [TS_92] (rows=80000000 width=860) + TableScan [TS_114] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_33] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_460] (rows=633595212 width=88) - Conds:RS_539._col0=RS_485._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 23 [SIMPLE_EDGE] vectorized + Conds:RS_537._col0=RS_485._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 27 [SIMPLE_EDGE] vectorized SHUFFLE [RS_485] PartitionCols:_col0 Select Operator [SEL_476] (rows=36524 width=1119) Output:["_col0"] Filter Operator [FIL_472] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_89] (rows=73049 width=1119) + TableScan [TS_111] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_539] + SHUFFLE [RS_537] PartitionCols:_col0 - Select Operator [SEL_538] (rows=575995635 width=88) + Select Operator [SEL_536] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_537] (rows=575995635 width=88) + Filter Operator [FIL_535] (rows=575995635 width=88) predicate:((ss_customer_sk BETWEEN DynamicValue(RS_34_customer_c_customer_sk_min) AND DynamicValue(RS_34_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_34_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_21] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_534] - Group By Operator [GBY_533] (rows=1 width=12) + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_532] + Group By Operator [GBY_531] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_500] Group By Operator [GBY_494] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_486] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_476] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_536] - Group By Operator [GBY_535] (rows=1 width=12) + <-Reducer 37 [BROADCAST_EDGE] vectorized + BROADCAST [RS_534] + Group By Operator [GBY_533] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 33 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_523] - Group By Operator [GBY_519] (rows=1 width=12) + <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_526] + Group By Operator [GBY_522] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_513] (rows=80000000 width=860) + Select Operator [SEL_516] (rows=80000000 width=860) Output:["_col0"] - Please refer to the previous Select Operator [SEL_506] + Please refer to the previous Select Operator [SEL_509] <-Reducer 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_554] PartitionCols:_col0 - Select Operator [SEL_553] (rows=348477374 width=88) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_552] (rows=348477374 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_60] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_58] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_463] (rows=696954748 width=88) - Conds:RS_55._col1=RS_508._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_508] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_506] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_462] (rows=633595212 width=88) - Conds:RS_551._col0=RS_481._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_481] - PartitionCols:_col0 - Select Operator [SEL_475] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_471] (rows=36524 width=1119) - predicate:((d_year = 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_89] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_551] - PartitionCols:_col0 - Select Operator [SEL_550] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_549] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_56_customer_c_customer_sk_min) AND DynamicValue(RS_56_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_56_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_43] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_546] - Group By Operator [GBY_545] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_498] - Group By Operator [GBY_492] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_482] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_475] - <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_548] - Group By Operator [GBY_547] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 33 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_521] - Group By Operator [GBY_517] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_509] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_506] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_566] - PartitionCols:_col0 - Select Operator [SEL_565] (rows=58077952 width=135) + Select Operator [SEL_553] (rows=58077952 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_564] (rows=58077952 width=135) + Filter Operator [FIL_552] (rows=58077952 width=135) predicate:(_col7 > 0) - Select Operator [SEL_563] (rows=174233858 width=135) + Select Operator [SEL_551] (rows=174233858 width=135) Output:["_col0","_col7"] - Group By Operator [GBY_562] (rows=174233858 width=135) + Group By Operator [GBY_550] (rows=174233858 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_82] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_61] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_81] (rows=348467716 width=135) + Group By Operator [GBY_60] (rows=348467716 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_79] (rows=348467716 width=135) + Select Operator [SEL_58] (rows=348467716 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_465] (rows=348467716 width=135) - Conds:RS_76._col1=RS_514._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_514] + Merge Join Operator [MERGEJOIN_463] (rows=348467716 width=135) + Conds:RS_55._col1=RS_517._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_517] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_506] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_76] + Please refer to the previous Select Operator [SEL_509] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_464] (rows=316788826 width=135) - Conds:RS_561._col0=RS_487._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 23 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_462] (rows=316788826 width=135) + Conds:RS_549._col0=RS_487._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 27 [SIMPLE_EDGE] vectorized SHUFFLE [RS_487] PartitionCols:_col0 Select Operator [SEL_477] (rows=36524 width=1119) Output:["_col0"] Filter Operator [FIL_473] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_89] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_561] + Please refer to the previous TableScan [TS_111] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_549] PartitionCols:_col0 - Select Operator [SEL_560] (rows=287989836 width=135) + Select Operator [SEL_548] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_559] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_77_customer_c_customer_sk_min) AND DynamicValue(RS_77_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_77_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_64] (rows=287989836 width=135) + Filter Operator [FIL_547] (rows=287989836 width=135) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_56_customer_c_customer_sk_min) AND DynamicValue(RS_56_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_56_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_43] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_556] - Group By Operator [GBY_555] (rows=1 width=12) + <-Reducer 32 [BROADCAST_EDGE] vectorized + BROADCAST [RS_544] + Group By Operator [GBY_543] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_501] Group By Operator [GBY_495] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_488] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_477] - <-Reducer 37 [BROADCAST_EDGE] vectorized - BROADCAST [RS_558] - Group By Operator [GBY_557] (rows=1 width=12) + <-Reducer 38 [BROADCAST_EDGE] vectorized + BROADCAST [RS_546] + Group By Operator [GBY_545] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 33 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_524] - Group By Operator [GBY_520] (rows=1 width=12) + <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_527] + Group By Operator [GBY_523] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_515] (rows=80000000 width=860) + Select Operator [SEL_518] (rows=80000000 width=860) Output:["_col0"] - Please refer to the previous Select Operator [SEL_506] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_574] - PartitionCols:_col0 - Select Operator [SEL_573] (rows=87121617 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_572] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_104] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_103] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_101] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_467] (rows=174243235 width=135) - Conds:RS_98._col1=RS_507._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_507] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_506] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_98] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_466] (rows=158402938 width=135) - Conds:RS_571._col0=RS_479._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_479] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_475] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_571] - PartitionCols:_col0 - Select Operator [SEL_570] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_569] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_96_date_dim_d_date_sk_min) AND DynamicValue(RS_96_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_96_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_86] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_568] - Group By Operator [GBY_567] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_497] - Group By Operator [GBY_491] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_480] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_475] - <-Reducer 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_584] + Please refer to the previous Select Operator [SEL_509] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_564] PartitionCols:_col0 - Select Operator [SEL_583] (rows=29040539 width=135) + Select Operator [SEL_563] (rows=29040539 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_582] (rows=29040539 width=135) + Filter Operator [FIL_562] (rows=29040539 width=135) predicate:(_col7 > 0) - Select Operator [SEL_581] (rows=87121617 width=135) + Select Operator [SEL_561] (rows=87121617 width=135) Output:["_col0","_col7"] - Group By Operator [GBY_580] (rows=87121617 width=135) + Group By Operator [GBY_560] (rows=87121617 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_125] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_83] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_124] (rows=174243235 width=135) + Group By Operator [GBY_82] (rows=174243235 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_122] (rows=174243235 width=135) + Select Operator [SEL_80] (rows=174243235 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_469] (rows=174243235 width=135) - Conds:RS_119._col1=RS_516._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_516] + Merge Join Operator [MERGEJOIN_465] (rows=174243235 width=135) + Conds:RS_77._col1=RS_519._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_519] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_506] - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_119] + Please refer to the previous Select Operator [SEL_509] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_77] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_468] (rows=158402938 width=135) - Conds:RS_579._col0=RS_489._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 23 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_464] (rows=158402938 width=135) + Conds:RS_559._col0=RS_489._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 27 [SIMPLE_EDGE] vectorized SHUFFLE [RS_489] PartitionCols:_col0 Select Operator [SEL_478] (rows=36524 width=1119) Output:["_col0"] Filter Operator [FIL_474] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_89] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_579] + Please refer to the previous TableScan [TS_111] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_559] PartitionCols:_col0 - Select Operator [SEL_578] (rows=144002668 width=135) + Select Operator [SEL_558] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_577] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_117_date_dim_d_date_sk_min) AND DynamicValue(RS_117_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_117_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_107] (rows=144002668 width=135) + Filter Operator [FIL_557] (rows=144002668 width=135) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_65] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_576] - Group By Operator [GBY_575] (rows=1 width=12) + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_556] + Group By Operator [GBY_555] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_502] Group By Operator [GBY_496] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_490] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_478] + <-Reducer 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_574] + PartitionCols:_col0 + Select Operator [SEL_573] (rows=174233858 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_572] (rows=174233858 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_104] (rows=348467716 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_102] (rows=348467716 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_467] (rows=348467716 width=135) + Conds:RS_99._col1=RS_512._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_512] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_509] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_99] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_466] (rows=316788826 width=135) + Conds:RS_571._col0=RS_481._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_481] + PartitionCols:_col0 + Select Operator [SEL_475] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_471] (rows=36524 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null) + Please refer to the previous TableScan [TS_111] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_571] + PartitionCols:_col0 + Select Operator [SEL_570] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_569] (rows=287989836 width=135) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_100_customer_c_customer_sk_min) AND DynamicValue(RS_100_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_100_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_87] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_566] + Group By Operator [GBY_565] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_498] + Group By Operator [GBY_492] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_482] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_475] + <-Reducer 36 [BROADCAST_EDGE] vectorized + BROADCAST [RS_568] + Group By Operator [GBY_567] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] + <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_525] + Group By Operator [GBY_521] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] + Select Operator [SEL_513] (rows=80000000 width=860) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_509] + <-Reducer 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_584] + PartitionCols:_col0 + Select Operator [SEL_583] (rows=348477374 width=88) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_582] (rows=348477374 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_126] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_125] (rows=696954748 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_123] (rows=696954748 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_469] (rows=696954748 width=88) + Conds:RS_120._col1=RS_510._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_510] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_509] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_120] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_468] (rows=633595212 width=88) + Conds:RS_581._col0=RS_479._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_479] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_475] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_581] + PartitionCols:_col0 + Select Operator [SEL_580] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_579] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_121_customer_c_customer_sk_min) AND DynamicValue(RS_121_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_121_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_118_date_dim_d_date_sk_min) AND DynamicValue(RS_118_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_118_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_108] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_576] + Group By Operator [GBY_575] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_497] + Group By Operator [GBY_491] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_480] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_475] + <-Reducer 35 [BROADCAST_EDGE] vectorized + BROADCAST [RS_578] + Group By Operator [GBY_577] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] + <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_524] + Group By Operator [GBY_520] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] + Select Operator [SEL_511] (rows=80000000 width=860) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_509] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_532] + SHUFFLE [RS_530] PartitionCols:_col0 - Select Operator [SEL_531] (rows=174233858 width=135) + Select Operator [SEL_529] (rows=87121617 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_530] (rows=174233858 width=135) + Group By Operator [GBY_528] (rows=87121617 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_17] (rows=348467716 width=135) + Group By Operator [GBY_17] (rows=174243235 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_15] (rows=348467716 width=135) + Select Operator [SEL_15] (rows=174243235 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_459] (rows=348467716 width=135) - Conds:RS_12._col1=RS_510._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_510] + Merge Join Operator [MERGEJOIN_459] (rows=174243235 width=135) + Conds:RS_12._col1=RS_514._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_514] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_506] + Please refer to the previous Select Operator [SEL_509] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_458] (rows=316788826 width=135) - Conds:RS_529._col0=RS_483._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 23 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_458] (rows=158402938 width=135) + Conds:RS_507._col0=RS_483._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 27 [SIMPLE_EDGE] vectorized SHUFFLE [RS_483] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_475] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_529] + SHUFFLE [RS_507] PartitionCols:_col0 - Select Operator [SEL_528] (rows=287989836 width=135) + Select Operator [SEL_506] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_527] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_13_customer_c_customer_sk_min) AND DynamicValue(RS_13_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_13_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 26 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_505] (rows=144002668 width=135) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 30 [BROADCAST_EDGE] vectorized BROADCAST [RS_504] Group By Operator [GBY_503] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_499] Group By Operator [GBY_493] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_484] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_475] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_526] - Group By Operator [GBY_525] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 33 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_522] - Group By Operator [GBY_518] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_511] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_506] diff --git a/ql/src/test/results/clientpositive/perf/tez/query74.q.out b/ql/src/test/results/clientpositive/perf/tez/query74.q.out index 738e64f799..854e6dc3aa 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query74.q.out @@ -121,10 +121,10 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 22 (BROADCAST_EDGE) -Map 11 <- Reducer 21 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE) -Map 15 <- Reducer 20 (BROADCAST_EDGE) -Map 7 <- Reducer 23 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE) +Map 1 <- Reducer 21 (BROADCAST_EDGE) +Map 11 <- Reducer 23 (BROADCAST_EDGE) +Map 15 <- Reducer 20 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE) +Map 7 <- Reducer 22 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) @@ -154,36 +154,36 @@ Stage-0 File Output Operator [FS_356] Limit [LIM_355] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_354] (rows=383325119 width=88) + Select Operator [SEL_354] (rows=574987679 width=88) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_89] - Select Operator [SEL_88] (rows=383325119 width=88) + Select Operator [SEL_88] (rows=574987679 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_87] (rows=383325119 width=88) - predicate:((_col9 / _col1) > (_col7 / _col3)) + Filter Operator [FIL_87] (rows=574987679 width=88) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE ((null > (_col9 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > null)) ELSE (null) END) END Merge Join Operator [MERGEJOIN_279] (rows=1149975359 width=88) - Conds:RS_323._col0=RS_335._col0(Inner),RS_335._col0=RS_345._col0(Inner),RS_335._col0=RS_353._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9"] + Conds:RS_321._col0=RS_333._col0(Inner),RS_333._col0=RS_343._col0(Inner),RS_333._col0=RS_353._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col9"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_335] + SHUFFLE [RS_333] PartitionCols:_col0 - Select Operator [SEL_334] (rows=116159124 width=88) + Select Operator [SEL_332] (rows=116159124 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_333] (rows=116159124 width=88) + Filter Operator [FIL_331] (rows=116159124 width=88) predicate:(_col4 > 0) - Select Operator [SEL_332] (rows=348477374 width=88) + Select Operator [SEL_330] (rows=348477374 width=88) Output:["_col0","_col4"] - Group By Operator [GBY_331] (rows=348477374 width=88) + Group By Operator [GBY_329] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_38] + SHUFFLE [RS_37] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_37] (rows=696954748 width=88) + Group By Operator [GBY_36] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 Merge Join Operator [MERGEJOIN_274] (rows=696954748 width=88) - Conds:RS_33._col1=RS_313._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] + Conds:RS_32._col1=RS_312._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] + SHUFFLE [RS_312] PartitionCols:_col0 Select Operator [SEL_308] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3"] @@ -192,212 +192,212 @@ Stage-0 TableScan [TS_68] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name"] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_33] + SHUFFLE [RS_32] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_273] (rows=633595212 width=88) - Conds:RS_330._col0=RS_292._col0(Inner),Output:["_col1","_col2","_col4"] + Conds:RS_328._col0=RS_290._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] + SHUFFLE [RS_290] PartitionCols:_col0 - Select Operator [SEL_285] (rows=18262 width=1119) + Select Operator [SEL_284] (rows=18262 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_282] (rows=18262 width=1119) + Filter Operator [FIL_281] (rows=18262 width=1119) predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) TableScan [TS_65] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] + SHUFFLE [RS_328] PartitionCols:_col0 - Select Operator [SEL_329] (rows=575995635 width=88) + Select Operator [SEL_327] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_328] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_34_customer_c_customer_sk_min) AND DynamicValue(RS_34_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_34_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_21] (rows=575995635 width=88) + Filter Operator [FIL_326] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_33_customer_c_customer_sk_min) AND DynamicValue(RS_33_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_33_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_20] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_325] - Group By Operator [GBY_324] (rows=1 width=12) + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_323] + Group By Operator [GBY_322] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] - Group By Operator [GBY_297] (rows=1 width=12) + SHUFFLE [RS_300] + Group By Operator [GBY_296] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_293] (rows=18262 width=1119) + Select Operator [SEL_291] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_285] + Please refer to the previous Select Operator [SEL_284] <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_327] - Group By Operator [GBY_326] (rows=1 width=12) + BROADCAST [RS_325] + Group By Operator [GBY_324] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_318] Group By Operator [GBY_316] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_314] (rows=80000000 width=860) + Select Operator [SEL_313] (rows=80000000 width=860) Output:["_col0"] Please refer to the previous Select Operator [SEL_308] <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] + SHUFFLE [RS_343] + PartitionCols:_col0 + Select Operator [SEL_342] (rows=29040539 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_341] (rows=29040539 width=135) + predicate:(_col4 > 0) + Select Operator [SEL_340] (rows=87121617 width=135) + Output:["_col0","_col4"] + Group By Operator [GBY_339] (rows=87121617 width=135) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_57] (rows=174243235 width=135) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 + Merge Join Operator [MERGEJOIN_276] (rows=174243235 width=135) + Conds:RS_53._col1=RS_314._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_314] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_308] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_275] (rows=158402938 width=135) + Conds:RS_338._col0=RS_292._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] + PartitionCols:_col0 + Select Operator [SEL_285] (rows=18262 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_282] (rows=18262 width=1119) + predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) + Please refer to the previous TableScan [TS_65] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_338] + PartitionCols:_col0 + Select Operator [SEL_337] (rows=144002668 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_336] (rows=144002668 width=135) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_41] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_335] + Group By Operator [GBY_334] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_301] + Group By Operator [GBY_297] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_293] (rows=18262 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_285] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_353] PartitionCols:_col0 - Select Operator [SEL_344] (rows=348477374 width=88) + Select Operator [SEL_352] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_343] (rows=348477374 width=88) + Group By Operator [GBY_351] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_59] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_79] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_58] (rows=696954748 width=88) + Group By Operator [GBY_78] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 - Merge Join Operator [MERGEJOIN_276] (rows=696954748 width=88) - Conds:RS_54._col1=RS_310._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_278] (rows=696954748 width=88) + Conds:RS_74._col1=RS_309._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] + SHUFFLE [RS_309] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_308] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_54] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_275] (rows=633595212 width=88) - Conds:RS_342._col0=RS_288._col0(Inner),Output:["_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_277] (rows=633595212 width=88) + Conds:RS_350._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_288] + SHUFFLE [RS_286] PartitionCols:_col0 Select Operator [SEL_283] (rows=18262 width=1119) Output:["_col0","_col1"] Filter Operator [FIL_280] (rows=18262 width=1119) predicate:((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) Please refer to the previous TableScan [TS_65] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_350] PartitionCols:_col0 - Select Operator [SEL_341] (rows=575995635 width=88) + Select Operator [SEL_349] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_340] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_55_customer_c_customer_sk_min) AND DynamicValue(RS_55_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_55_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_42] (rows=575995635 width=88) + Filter Operator [FIL_348] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_75_customer_c_customer_sk_min) AND DynamicValue(RS_75_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_75_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_72_date_dim_d_date_sk_min) AND DynamicValue(RS_72_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_72_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_62] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_337] - Group By Operator [GBY_336] (rows=1 width=12) + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_345] + Group By Operator [GBY_344] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_299] - Group By Operator [GBY_295] (rows=1 width=12) + SHUFFLE [RS_298] + Group By Operator [GBY_294] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_289] (rows=18262 width=1119) + Select Operator [SEL_287] (rows=18262 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_283] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_339] - Group By Operator [GBY_338] (rows=1 width=12) + BROADCAST [RS_347] + Group By Operator [GBY_346] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_317] Group By Operator [GBY_315] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_311] (rows=80000000 width=860) + Select Operator [SEL_310] (rows=80000000 width=860) Output:["_col0"] Please refer to the previous Select Operator [SEL_308] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_353] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] PartitionCols:_col0 - Select Operator [SEL_352] (rows=87121617 width=135) + Select Operator [SEL_320] (rows=87121617 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_351] (rows=87121617 width=135) + Group By Operator [GBY_319] (rows=87121617 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_79] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_78] (rows=174243235 width=135) + Group By Operator [GBY_16] (rows=174243235 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 - Merge Join Operator [MERGEJOIN_278] (rows=174243235 width=135) - Conds:RS_74._col1=RS_309._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_272] (rows=174243235 width=135) + Conds:RS_12._col1=RS_311._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] + SHUFFLE [RS_311] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_308] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_74] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_277] (rows=158402938 width=135) - Conds:RS_350._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_271] (rows=158402938 width=135) + Conds:RS_306._col0=RS_288._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + SHUFFLE [RS_288] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_283] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_306] PartitionCols:_col0 - Select Operator [SEL_349] (rows=144002668 width=135) + Select Operator [SEL_305] (rows=144002668 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_348] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_72_date_dim_d_date_sk_min) AND DynamicValue(RS_72_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_72_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_62] (rows=144002668 width=135) + Filter Operator [FIL_304] (rows=144002668 width=135) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_347] - Group By Operator [GBY_346] (rows=1 width=12) + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_303] + Group By Operator [GBY_302] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] - Group By Operator [GBY_294] (rows=1 width=12) + SHUFFLE [RS_299] + Group By Operator [GBY_295] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_287] (rows=18262 width=1119) + Select Operator [SEL_289] (rows=18262 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_283] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] - PartitionCols:_col0 - Select Operator [SEL_322] (rows=29040539 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_321] (rows=29040539 width=135) - predicate:(_col4 > 0) - Select Operator [SEL_320] (rows=87121617 width=135) - Output:["_col0","_col4"] - Group By Operator [GBY_319] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_16] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 - Merge Join Operator [MERGEJOIN_272] (rows=174243235 width=135) - Conds:RS_12._col1=RS_312._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_308] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_271] (rows=158402938 width=135) - Conds:RS_306._col0=RS_290._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] - PartitionCols:_col0 - Select Operator [SEL_284] (rows=18262 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_281] (rows=18262 width=1119) - predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_65] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - PartitionCols:_col0 - Select Operator [SEL_305] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_304] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_303] - Group By Operator [GBY_302] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_300] - Group By Operator [GBY_296] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_291] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_284] diff --git a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out index 7509697988..df4e4768be 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out @@ -468,25 +468,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table3_n0 - filterExpr: (id = 100) (type: boolean) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id = 100) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 100 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: boolean) - mode: hash + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 100 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: boolean) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col1 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -497,10 +498,10 @@ STAGE PLANS: 0 100 (type: int), true (type: boolean) 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -655,25 +656,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table3_n0 - filterExpr: (id = 100) (type: boolean) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id = 100) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 100 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: boolean) - mode: hash + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 100 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: boolean) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col1 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -684,10 +686,10 @@ STAGE PLANS: 0 100 (type: int), true (type: boolean) 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index 11c7891d41..83437e5593 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -1927,7 +1927,7 @@ POSTHOOK: query: explain extended select key, value from pcr_t1 where ds>='2000- POSTHOOK: type: QUERY OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`pcr_t1` -WHERE `ds` >= '2000-04-08' OR `ds` < '2000-04-10' +WHERE `ds` >= '2000-04-08' OR `ds` IS NOT NULL ORDER BY `key`, `value` STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1944,7 +1944,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcr_t1 - filterExpr: ((ds >= '2000-04-08') or (ds < '2000-04-10')) (type: boolean) + filterExpr: ((ds >= '2000-04-08') or ds is not null) (type: boolean) Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index b488129cd0..c5d0d63f8c 100644 --- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -428,24 +428,24 @@ Stage-0 limit:-1 Stage-1 Reducer 4 - File Output Operator [FS_32] - Select Operator [SEL_30] (rows=1 width=20) + File Output Operator [FS_33] + Select Operator [SEL_31] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SORT] - SORT [RS_29] - Select Operator [SEL_28] (rows=1 width=28) + SORT [RS_30] + Select Operator [SEL_29] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_27] (rows=1 width=20) + Group By Operator [GBY_28] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [GROUP] - GROUP [RS_26] + GROUP [RS_27] PartitionCols:_col0, _col1 - Group By Operator [GBY_25] (rows=1 width=20) + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_24] (rows=1 width=20) + Select Operator [SEL_25] (rows=1 width=20) Output:["_col1","_col4"] Filter Operator [FIL_21] (rows=1 width=20) - predicate:(((UDFToLong(_col1) + _col4) >= 0) and ((_col1 >= 1) or (_col4 >= 1L)) and ((_col3 + _col6) >= 0)) + predicate:(((UDFToLong(_col1) + _col4) >= 0) and ((_col1 >= 1) or (_col4 >= 1L)) and ((_col3 + _col6) >= 0) and ((_col3 > 0) or _col1 is not null)) Join Operator [JOIN_20] (rows=3 width=18) Output:["_col1","_col3","_col4","_col6"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] @@ -453,7 +453,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_33] (rows=18 width=84) + Filter Operator [FIL_34] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] @@ -469,8 +469,8 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_34] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + Filter Operator [FIL_35] (rows=2 width=93) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [PARTITION-LEVEL SORT] @@ -485,8 +485,8 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + Filter Operator [FIL_36] (rows=2 width=93) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -543,7 +543,7 @@ Stage-0 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_30] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [PARTITION-LEVEL SORT] @@ -559,7 +559,7 @@ Stage-0 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float Filter Operator [FIL_31] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -581,22 +581,22 @@ Stage-0 limit:-1 Stage-1 Reducer 4 - File Output Operator [FS_31] - Select Operator [SEL_30] (rows=1 width=20) + File Output Operator [FS_32] + Select Operator [SEL_31] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SORT] - SORT [RS_29] - Group By Operator [GBY_27] (rows=1 width=20) + SORT [RS_30] + Group By Operator [GBY_28] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [GROUP] - GROUP [RS_26] + GROUP [RS_27] PartitionCols:_col0, _col1 - Group By Operator [GBY_25] (rows=1 width=20) + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_24] (rows=1 width=20) + Select Operator [SEL_25] (rows=1 width=20) Output:["_col1","_col4"] Filter Operator [FIL_21] (rows=1 width=20) - predicate:(((UDFToLong(_col1) + _col4) >= 0) and ((_col1 >= 1) or (_col4 >= 1L)) and ((_col3 + _col6) >= 0)) + predicate:(((UDFToLong(_col1) + _col4) >= 0) and ((_col1 >= 1) or (_col4 >= 1L)) and ((_col3 + _col6) >= 0) and ((_col3 > 0) or _col1 is not null)) Join Operator [JOIN_20] (rows=3 width=18) Output:["_col1","_col3","_col4","_col6"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] @@ -604,7 +604,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=18 width=84) + Filter Operator [FIL_33] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] @@ -620,8 +620,8 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_33] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + Filter Operator [FIL_34] (rows=2 width=93) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [PARTITION-LEVEL SORT] @@ -636,8 +636,8 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_34] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + Filter Operator [FIL_35] (rows=2 width=93) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -694,7 +694,7 @@ Stage-0 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_30] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [PARTITION-LEVEL SORT] @@ -710,7 +710,7 @@ Stage-0 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float Filter Operator [FIL_31] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1537,7 +1537,7 @@ Stage-0 Group By Operator [GBY_3] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_35] (rows=1 width=93) - predicate:((((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0D)) and ((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) + predicate:((((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0D)) and ((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [PARTITION-LEVEL SORT] @@ -1555,7 +1555,7 @@ Stage-0 Group By Operator [GBY_12] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float Filter Operator [FIL_36] (rows=1 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) TableScan [TS_9] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out index f90b353818..0a76e62438 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out @@ -859,7 +859,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 Statistics: Num rows: 1 Data size: 39416 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col10 = 0L) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) + predicate: ((_col10 = 0L) or (_col13 is null and (_col11 >= _col10))) (type: boolean) Statistics: Num rows: 1 Data size: 39416 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index 9105f587c0..e2d51e85cf 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -1788,15 +1788,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 28 Data size: 3601 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col5 BETWEEN CASE WHEN (_col10 is null) THEN (null) ELSE (_col9) END AND _col12 (type: boolean) - Statistics: Num rows: 3 Data size: 385 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col10 is null) THEN (_col5 BETWEEN null AND _col12) ELSE (_col5 BETWEEN _col9 AND _col12) END (type: boolean) + Statistics: Num rows: 14 Data size: 1800 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 385 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 385 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2307,15 +2307,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col5) <> CASE WHEN (_col10 is null) THEN (0) ELSE (_col9) END) (type: boolean) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col10 is null) THEN ((UDFToLong(_col5) <> 0)) ELSE ((UDFToLong(_col5) <> _col9)) END (type: boolean) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4624,15 +4624,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col2) <> CASE WHEN (_col11 is null) THEN (0) ELSE (_col10) END) (type: boolean) - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col11 is null) THEN ((UDFToLong(_col2) <> 0)) ELSE ((UDFToLong(_col2) <> _col10)) END (type: boolean) + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4886,17 +4886,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col2) <> CASE WHEN (_col11 is null) THEN (0) ELSE (_col10) END) (type: boolean) - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col11 is null) THEN ((UDFToLong(_col2) <> 0)) ELSE ((UDFToLong(_col2) <> _col10)) END (type: boolean) + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) Reducer 3 Reduce Operator Tree: @@ -4907,9 +4907,9 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14 - Statistics: Num rows: 5 Data size: 287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 114 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col0) > CASE WHEN (_col14 is null) THEN (0) ELSE (_col13) END) (type: boolean) + predicate: CASE WHEN (_col14 is null) THEN ((UDFToLong(_col0) > 0)) ELSE ((UDFToLong(_col0) > _col13)) END (type: boolean) Statistics: Num rows: 1 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) @@ -5913,15 +5913,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 185 Data size: 45180 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col5) <> CASE WHEN (_col10 is null) THEN (0) ELSE (_col9) END) (type: boolean) - Statistics: Num rows: 185 Data size: 45180 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col10 is null) THEN ((UDFToLong(_col5) <> 0)) ELSE ((UDFToLong(_col5) <> _col9)) END (type: boolean) + Statistics: Num rows: 92 Data size: 22467 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 185 Data size: 45180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 92 Data size: 22467 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 185 Data size: 45180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 92 Data size: 22467 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/subquery_views.q.out b/ql/src/test/results/clientpositive/spark/subquery_views.q.out index 67711c234e..54a8894ffd 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_views.q.out @@ -337,7 +337,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4, _col5, _col8 Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) Statistics: Num rows: 182 Data size: 1933 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 028351be0c..6d5fefb9ff 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -1239,7 +1239,7 @@ where (case when cint % 2 = 0 then cint else 0 end) = cint) a POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -0 +4086 PREHOOK: query: select count(*) from ( select diff --git a/ql/src/test/results/clientpositive/stat_estimate_related_col.q.out b/ql/src/test/results/clientpositive/stat_estimate_related_col.q.out index 78524d6716..669adafda3 100644 --- a/ql/src/test/results/clientpositive/stat_estimate_related_col.q.out +++ b/ql/src/test/results/clientpositive/stat_estimate_related_col.q.out @@ -594,10 +594,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t8 - filterExpr: ((b = 2) and ((b = 1) or (b = 2))) (type: boolean) + filterExpr: (b = 2) (type: boolean) Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((b = 1) or (b = 2)) and (b = 2)) (type: boolean) + predicate: (b = 2) (type: boolean) Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) @@ -663,27 +663,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t8 - filterExpr: ((b = 2) and ((b = 1) or (b = 2)) and ((b = 1) or (b = 3))) (type: boolean) - Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40/1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((b = 1) or (b = 2)) and ((b = 1) or (b = 3)) and (b = 2)) (type: boolean) - Statistics: Num rows: 8/0 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 8/0 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: 2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + predicate: false (type: boolean) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(a) + keys: b (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -748,10 +743,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t8 - filterExpr: ((b = 2) and ((b = 1) or (b = 2)) and (a = 3) and ((a = 3) or (a = 4))) (type: boolean) + filterExpr: ((b = 2) and (a = 3)) (type: boolean) Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((a = 3) or (a = 4)) and ((b = 1) or (b = 2)) and (a = 3) and (b = 2)) (type: boolean) + predicate: ((a = 3) and (b = 2)) (type: boolean) Statistics: Num rows: 8/0 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 8/0 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/union_offcbo.q.out b/ql/src/test/results/clientpositive/union_offcbo.q.out index ce27bf2932..54c4edfa7c 100644 --- a/ql/src/test/results/clientpositive/union_offcbo.q.out +++ b/ql/src/test/results/clientpositive/union_offcbo.q.out @@ -632,10 +632,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((NVL(_col0,-1) <> NVL(_col7,-1)) or (NVL(_col1,-1) <> NVL(_col8,-1))) and CASE WHEN ((_col0 is null and (_col3 >= '2016-02-05') and _col7 is not null)) THEN (true) WHEN ((_col0 is null and (_col3 <= '2016-02-05') and _col7 is not null)) THEN (false) WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN (true) ELSE (true) END) (type: boolean) + predicate: (((NVL(_col0,-1) <> NVL(_col7,-1)) or (NVL(_col1,-1) <> NVL(_col8,-1))) and CASE WHEN ((_col0 is null and (_col3 >= '2016-02-05'))) THEN (true) WHEN ((_col0 is null and (_col3 <= '2016-02-05'))) THEN (false) WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN (true) ELSE (true) END) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col4 (type: string), _col7 (type: string), _col8 (type: string), CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END (type: string) + expressions: _col2 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col4 (type: string), _col7 (type: string), _col8 (type: string), CASE WHEN ((_col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -708,29 +708,29 @@ STAGE PLANS: predicate: ('2015-11-20' BETWEEN dt1 AND dt2 and khash is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: ts1 (type: string), khash (type: string), rhash (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: khash (type: string), rhash (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col3 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((NVL(_col3,-1) <> NVL(_col6,-1)) or (NVL(_col4,-1) <> NVL(_col7,-1))) (type: boolean) + predicate: ((NVL(_col3,-1) <> NVL(_col5,-1)) or (NVL(_col4,-1) <> NVL(_col6,-1))) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), '2099-12-31' (type: string), _col3 (type: string), _col4 (type: string), CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END (type: string) + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), '2099-12-31' (type: string), _col3 (type: string), _col4 (type: string), CASE WHEN ((_col5 is null or ((_col5 = _col3) and (_col6 <> _col4)))) THEN ('INS') ELSE ('NA') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -1683,10 +1683,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((NVL(_col0,-1) <> NVL(_col7,-1)) or (NVL(_col1,-1) <> NVL(_col8,-1))) and CASE WHEN ((_col0 is null and (_col3 >= '2016-02-05') and _col7 is not null)) THEN (true) WHEN ((_col0 is null and (_col3 <= '2016-02-05') and _col7 is not null)) THEN (false) WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN (true) ELSE (true) END) (type: boolean) + predicate: (((NVL(_col0,-1) <> NVL(_col7,-1)) or (NVL(_col1,-1) <> NVL(_col8,-1))) and CASE WHEN ((_col0 is null and (_col3 >= '2016-02-05'))) THEN (true) WHEN ((_col0 is null and (_col3 <= '2016-02-05'))) THEN (false) WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN (true) ELSE (true) END) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col4 (type: string), _col7 (type: string), _col8 (type: string), CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END (type: string) + expressions: _col2 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col4 (type: string), _col7 (type: string), _col8 (type: string), CASE WHEN ((_col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -1757,29 +1757,29 @@ STAGE PLANS: predicate: ('2015-11-20' BETWEEN dt1 AND dt2 and khash is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: ts1 (type: string), khash (type: string), rhash (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: khash (type: string), rhash (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col3 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((NVL(_col3,-1) <> NVL(_col6,-1)) or (NVL(_col4,-1) <> NVL(_col7,-1))) (type: boolean) + predicate: ((NVL(_col3,-1) <> NVL(_col5,-1)) or (NVL(_col4,-1) <> NVL(_col6,-1))) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), '2099-12-31' (type: string), _col3 (type: string), _col4 (type: string), CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END (type: string) + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), '2099-12-31' (type: string), _col3 (type: string), _col4 (type: string), CASE WHEN ((_col5 is null or ((_col5 = _col3) and (_col6 <> _col4)))) THEN ('INS') ELSE ('NA') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git a/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java b/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java index 3894e09a4f..6325d5d1db 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java +++ b/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java @@ -38,7 +38,6 @@ * YYYY-MM-DD * */ -@Deprecated public class DateWritable implements WritableComparable { private static final long MILLIS_PER_DAY = TimeUnit.DAYS.toMillis(1);