diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java index 13c38560d5..90ffddba0d 100644 --- common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java +++ common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java @@ -143,7 +143,7 @@ public static ZoneId parseTimeZone(String timeZoneStr) { return ZoneId.of(timeZoneStr); } catch (DateTimeException e1) { // default - throw new RuntimeException("Invalid time zone displacement value"); + throw new RuntimeException("Invalid time zone displacement value", e1); } } diff --git common/src/java/org/apache/hadoop/hive/conf/Constants.java common/src/java/org/apache/hadoop/hive/conf/Constants.java index 10aaee182f..3f976212e3 100644 --- common/src/java/org/apache/hadoop/hive/conf/Constants.java +++ common/src/java/org/apache/hadoop/hive/conf/Constants.java @@ -38,6 +38,8 @@ public static final String DRUID_TIMESTAMP_GRANULARITY_COL_NAME = "__time_granularity"; public static final String DRUID_SHARD_KEY_COL_NAME = "__druid_extra_partition_key"; public static final String DRUID_QUERY_JSON = "druid.query.json"; + public static final String DRUID_QUERY_FIELD_NAMES = "druid.fieldNames"; + public static final String DRUID_QUERY_FIELD_TYPES = "druid.fieldTypes"; public static final String DRUID_QUERY_TYPE = "druid.query.type"; public static final String DRUID_QUERY_FETCH = "druid.query.fetch"; public static final String DRUID_SEGMENT_DIRECTORY = "druid.storage.storageDirectory"; diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java index 2f956b179b..44be795a60 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java @@ -35,10 +35,18 @@ import io.druid.metadata.MetadataStorageTablesConfig; import io.druid.metadata.SQLMetadataConnector; import io.druid.metadata.storage.mysql.MySQLConnector; +import io.druid.query.expression.LikeExprMacro; +import io.druid.query.expression.RegexpExtractExprMacro; +import io.druid.query.expression.TimestampCeilExprMacro; +import io.druid.query.expression.TimestampExtractExprMacro; +import io.druid.query.expression.TimestampFloorExprMacro; +import io.druid.query.expression.TimestampFormatExprMacro; +import io.druid.query.expression.TimestampParseExprMacro; +import io.druid.query.expression.TimestampShiftExprMacro; +import io.druid.query.expression.TrimExprMacro; import io.druid.query.select.SelectQueryConfig; import io.druid.segment.IndexIO; import io.druid.segment.IndexMergerV9; -import io.druid.segment.column.ColumnConfig; import io.druid.segment.loading.DataSegmentPusher; import io.druid.segment.realtime.appenderator.SegmentIdentifier; import io.druid.storage.hdfs.HdfsDataSegmentPusher; @@ -100,7 +108,6 @@ import java.net.UnknownHostException; import java.sql.SQLException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; @@ -145,8 +152,21 @@ static { // This is needed for serde of PagingSpec as it uses JacksonInject for injecting SelectQueryConfig InjectableValues.Std injectableValues = new InjectableValues.Std() - .addValue(SelectQueryConfig.class, new SelectQueryConfig(false)) - .addValue(ExprMacroTable.class, ExprMacroTable.nil()); + .addValue(SelectQueryConfig.class, new SelectQueryConfig(false)) + // Expressions macro table used when we deserialize the query from calcite plan + .addValue(ExprMacroTable.class, new ExprMacroTable(ImmutableList + .of(new LikeExprMacro(), + new RegexpExtractExprMacro(), + new TimestampCeilExprMacro(), + new TimestampExtractExprMacro(), + new TimestampFormatExprMacro(), + new TimestampParseExprMacro(), + new TimestampShiftExprMacro(), + new TimestampFloorExprMacro(), + new TrimExprMacro.BothTrimExprMacro(), + new TrimExprMacro.LeftTrimExprMacro(), + new TrimExprMacro.RightTrimExprMacro() + ))); JSON_MAPPER.setInjectableValues(injectableValues); SMILE_MAPPER.setInjectableValues(injectableValues); HiveDruidSerializationModule hiveDruidSerializationModule = new HiveDruidSerializationModule(); @@ -171,12 +191,7 @@ /** * Used by druid to perform IO on indexes */ - public static final IndexIO INDEX_IO = new IndexIO(JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() { - return 0; - } - }); + public static final IndexIO INDEX_IO = new IndexIO(JSON_MAPPER, () -> 0); /** * Used by druid to merge indexes @@ -327,19 +342,12 @@ public static void writeSegmentDescriptor( metadataStorageTablesConfig.getSegmentsTable() )) .fold(Lists.newArrayList(), - new Folder3, Map>() { - @Override - public ArrayList fold(ArrayList druidDataSources, - Map stringObjectMap, - FoldController foldController, - StatementContext statementContext - ) throws SQLException { - druidDataSources.add( - MapUtils.getString(stringObjectMap, "datasource") - ); - return druidDataSources; - } - } + (druidDataSources, stringObjectMap, foldController, statementContext) -> { + druidDataSources.add( + MapUtils.getString(stringObjectMap, "datasource") + ); + return druidDataSources; + } ) ); } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java index 33f6412380..c097a13194 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java @@ -132,6 +132,7 @@ public static DruidQueryRecordReader getDruidQueryReader(String druidQueryType) if (dataSource == null || dataSource.isEmpty()) { throw new IOException("Druid data source cannot be empty or null"); } + //@FIXME https://issues.apache.org/jira/browse/HIVE-19023 use scan instead of Select druidQuery = createSelectStarQuery(dataSource); druidQueryType = Query.SELECT; } else { diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java index 2800f058a0..12b4f9d33b 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java @@ -132,7 +132,7 @@ public DruidWritable getCurrentValue() throws IOException, InterruptedException // Create new value DruidWritable value = new DruidWritable(); // 1) The timestamp column - value.getValue().put(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, currentRow.getTimestamp().getMillis()); + value.getValue().put("timestamp", currentRow.getTimestamp().getMillis()); // 2) The dimension columns value.getValue().putAll(currentEvent); return value; @@ -144,7 +144,7 @@ public boolean next(NullWritable key, DruidWritable value) { // Update value value.getValue().clear(); // 1) The timestamp column - value.getValue().put(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, currentRow.getTimestamp().getMillis()); + value.getValue().put("timestamp", currentRow.getTimestamp().getMillis()); // 2) The dimension columns value.getValue().putAll(currentEvent); return true; diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java index 914954da6f..9b8a26e940 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java @@ -19,15 +19,21 @@ import java.io.IOException; import java.io.InputStream; +import java.sql.Timestamp; import java.time.Instant; import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; +import java.util.stream.Collectors; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -50,6 +56,7 @@ import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -72,6 +79,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; @@ -85,24 +93,13 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import io.druid.query.Druids; import io.druid.query.Druids.SegmentMetadataQueryBuilder; -import io.druid.query.Query; -import io.druid.query.aggregation.AggregatorFactory; -import io.druid.query.aggregation.PostAggregator; -import io.druid.query.dimension.DimensionSpec; -import io.druid.query.groupby.GroupByQuery; import io.druid.query.metadata.metadata.ColumnAnalysis; import io.druid.query.metadata.metadata.SegmentAnalysis; import io.druid.query.metadata.metadata.SegmentMetadataQuery; -import io.druid.query.scan.ScanQuery; -import io.druid.query.select.SelectQuery; -import io.druid.query.timeseries.TimeseriesQuery; -import io.druid.query.topn.TopNQuery; /** * DruidSerDe that is used to deserialize objects from a Druid data source. @@ -115,169 +112,151 @@ private String[] columns; private PrimitiveTypeInfo[] types; private ObjectInspector inspector; + private TimestampLocalTZTypeInfo tsTZTypeInfo; @Override public void initialize(Configuration configuration, Properties properties) throws SerDeException { - final List columnNames = new ArrayList<>(); - final List columnTypes = new ArrayList<>(); - List inspectors = new ArrayList<>(); - - final TimestampLocalTZTypeInfo tsTZTypeInfo = new TimestampLocalTZTypeInfo( + tsTZTypeInfo = new TimestampLocalTZTypeInfo( configuration.get(HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE.varname)); - // Druid query - String druidQuery = properties.getProperty(Constants.DRUID_QUERY_JSON); - if (druidQuery == null) { - // No query. Either it is a CTAS, or we need to create a Druid - // Segment Metadata query that retrieves all columns present in - // the data source (dimensions and metrics). + final String druidQuery = properties.getProperty(Constants.DRUID_QUERY_JSON, null); + if (druidQuery != null && !druidQuery.isEmpty()) { + initFromDruidQueryPlan(properties, druidQuery); + } else { + // No query. Either it is a CTAS, or we need to create a Druid meta data Query if (!org.apache.commons.lang3.StringUtils .isEmpty(properties.getProperty(serdeConstants.LIST_COLUMNS)) && !org.apache.commons.lang3.StringUtils .isEmpty(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES))) { - columnNames.addAll(Utilities.getColumnNames(properties)); - if (!columnNames.contains(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) { - throw new SerDeException("Timestamp column (' " + DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN + - "') not specified in create table; list of columns is : " + - properties.getProperty(serdeConstants.LIST_COLUMNS)); - } - columnTypes.addAll(Lists.transform( - Lists.transform(Utilities.getColumnTypes(properties), type -> TypeInfoFactory.getPrimitiveTypeInfo(type)), - e -> e instanceof TimestampLocalTZTypeInfo ? tsTZTypeInfo : e - )); - inspectors.addAll(Lists.transform(columnTypes, - (Function) type -> PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(type) - )); - columns = columnNames.toArray(new String[columnNames.size()]); - types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]); - inspector = ObjectInspectorFactory - .getStandardStructObjectInspector(columnNames, inspectors); + // CASE CTAS statement + initFromProperties(properties); } else { - String dataSource = properties.getProperty(Constants.DRUID_DATA_SOURCE); - if (dataSource == null) { - throw new SerDeException("Druid data source not specified; use " + - Constants.DRUID_DATA_SOURCE + " in table properties"); - } - SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder(); - builder.dataSource(dataSource); - builder.merge(true); - builder.analysisTypes(); - SegmentMetadataQuery query = builder.build(); - - // Execute query in Druid - String address = HiveConf.getVar(configuration, - HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS - ); - if (org.apache.commons.lang3.StringUtils.isEmpty(address)) { - throw new SerDeException("Druid broker address not specified in configuration"); - } - - // Infer schema - SegmentAnalysis schemaInfo; - try { - schemaInfo = submitMetadataRequest(address, query); - } catch (IOException e) { - throw new SerDeException(e); - } - for (Entry columnInfo : schemaInfo.getColumns().entrySet()) { - if (columnInfo.getKey().equals(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) { - // Special handling for timestamp column - columnNames.add(columnInfo.getKey()); // field name - PrimitiveTypeInfo type = tsTZTypeInfo; // field type - columnTypes.add(type); - inspectors - .add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type)); - continue; - } - columnNames.add(columnInfo.getKey()); // field name - PrimitiveTypeInfo type = DruidSerDeUtils.convertDruidToHiveType( - columnInfo.getValue().getType()); // field type - columnTypes.add(type instanceof TimestampLocalTZTypeInfo ? tsTZTypeInfo : type); - inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type)); - } - columns = columnNames.toArray(new String[columnNames.size()]); - types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]); - inspector = ObjectInspectorFactory - .getStandardStructObjectInspector(columnNames, inspectors); + // Segment Metadata query that retrieves all columns present in + // the data source (dimensions and metrics). + initFromMetaDataQuery(configuration, properties); } - } else { - // Query is specified, we can extract the results schema from the query - Query query; - try { - query = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery, Query.class); + } + if (LOG.isDebugEnabled()) { + LOG.debug("DruidSerDe initialized with\n" + "\t columns: " + Arrays.toString(columns) + + "\n\t types: " + Arrays.toString(types)); + } + } - // Extract column names and types (if present) - ImmutableMap.Builder mapColumnNamesTypes = ImmutableMap.builder(); - if (!org.apache.commons.lang3.StringUtils - .isEmpty(properties.getProperty(serdeConstants.LIST_COLUMNS)) - && !org.apache.commons.lang3.StringUtils - .isEmpty(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES))) { - List propColumnNames = Utilities.getColumnNames(properties); - List propColumnTypes = Utilities.getColumnTypes(properties); - for (int i = 0; i < propColumnNames.size(); i++) { - PrimitiveTypeInfo type = TypeInfoFactory.getPrimitiveTypeInfo(propColumnTypes.get(i)); - if (type instanceof TimestampLocalTZTypeInfo) { - type = tsTZTypeInfo; - } - mapColumnNamesTypes.put(propColumnNames.get(i), type); - } - } + private void initFromMetaDataQuery(final Configuration configuration, final Properties properties) + throws SerDeException { + final List columnNames = new ArrayList<>(); + final List columnTypes = new ArrayList<>(); + final List inspectors = new ArrayList<>(); - switch (query.getType()) { - case Query.TIMESERIES: - inferSchema((TimeseriesQuery) query, tsTZTypeInfo, columnNames, columnTypes, - mapColumnNamesTypes.build()); - break; - case Query.TOPN: - inferSchema((TopNQuery) query, tsTZTypeInfo, columnNames, columnTypes, - mapColumnNamesTypes.build()); - break; - case Query.SELECT: - String address = HiveConf.getVar(configuration, - HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS); - if (org.apache.commons.lang3.StringUtils.isEmpty(address)) { - throw new SerDeException("Druid broker address not specified in configuration"); - } - inferSchema((SelectQuery) query, tsTZTypeInfo, columnNames, columnTypes, address, - mapColumnNamesTypes.build()); - break; - case Query.GROUP_BY: - inferSchema((GroupByQuery) query, tsTZTypeInfo, columnNames, columnTypes, - mapColumnNamesTypes.build()); - break; - case Query.SCAN: - String broker = HiveConf.getVar(configuration, - HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS); - if (org.apache.commons.lang3.StringUtils.isEmpty(broker)) { - throw new SerDeException("Druid broker address not specified in configuration"); - } - inferSchema((ScanQuery) query, tsTZTypeInfo, columnNames, columnTypes, broker, - mapColumnNamesTypes.build()); - break; - default: - throw new SerDeException("Not supported Druid query"); - } - } catch (Exception e) { - throw new SerDeException(e); - } + String dataSource = properties.getProperty(Constants.DRUID_DATA_SOURCE); + if (dataSource == null) { + throw new SerDeException("Druid data source not specified; use " + + Constants.DRUID_DATA_SOURCE + " in table properties"); + } + SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder(); + builder.dataSource(dataSource); + builder.merge(true); + builder.analysisTypes(); + SegmentMetadataQuery query = builder.build(); - columns = new String[columnNames.size()]; - types = new PrimitiveTypeInfo[columnNames.size()]; - for (int i = 0; i < columnTypes.size(); ++i) { - columns[i] = columnNames.get(i); - types[i] = columnTypes.get(i); - inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(types[i])); + // Execute query in Druid + String address = HiveConf.getVar(configuration, + HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS + ); + if (org.apache.commons.lang3.StringUtils.isEmpty(address)) { + throw new SerDeException("Druid broker address not specified in configuration"); + } + // Infer schema + SegmentAnalysis schemaInfo; + try { + schemaInfo = submitMetadataRequest(address, query); + } catch (IOException e) { + throw new SerDeException(e); + } + for (Entry columnInfo : schemaInfo.getColumns().entrySet()) { + if (columnInfo.getKey().equals(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) { + // Special handling for timestamp column + columnNames.add(columnInfo.getKey()); // field name + PrimitiveTypeInfo type = tsTZTypeInfo; // field type + columnTypes.add(type); + inspectors + .add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type)); + continue; } - inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); + columnNames.add(columnInfo.getKey()); // field name + PrimitiveTypeInfo type = DruidSerDeUtils.convertDruidToHiveType( + columnInfo.getValue().getType()); // field type + columnTypes.add(type instanceof TimestampLocalTZTypeInfo ? tsTZTypeInfo : type); + inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type)); } + columns = columnNames.toArray(new String[columnNames.size()]); + types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]); + inspector = ObjectInspectorFactory + .getStandardStructObjectInspector(columnNames, inspectors); + } - if (LOG.isDebugEnabled()) { - LOG.debug("DruidSerDe initialized with\n" - + "\t columns: " + columnNames - + "\n\t types: " + columnTypes); + private void initFromProperties(final Properties properties) + throws SerDeException { + final List inspectors = new ArrayList<>(); + final List columnNames = new ArrayList<>(); + final List columnTypes = new ArrayList<>(); + + columnNames.addAll(Utilities.getColumnNames(properties)); + if (!columnNames.contains(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) { + throw new SerDeException("Timestamp column (' " + DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN + + "') not specified in create table; list of columns is : " + + properties.getProperty(serdeConstants.LIST_COLUMNS)); + } + columnTypes.addAll(Lists.transform( + Lists.transform(Utilities.getColumnTypes(properties), type -> TypeInfoFactory.getPrimitiveTypeInfo(type)), + e -> e instanceof TimestampLocalTZTypeInfo ? tsTZTypeInfo : e + )); + inspectors.addAll(Lists.transform(columnTypes, + (Function) type -> PrimitiveObjectInspectorFactory + .getPrimitiveWritableObjectInspector(type) + )); + columns = columnNames.toArray(new String[columnNames.size()]); + types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]); + inspector = ObjectInspectorFactory + .getStandardStructObjectInspector(columnNames, inspectors); + } + + private void initFromDruidQueryPlan(Properties properties, String druidQuery) { + Preconditions.checkNotNull(druidQuery, "Why Druid query is null"); + final List inspectors = new ArrayList<>(); + final List columnNames; + final List columnTypes; + final String fieldNamesProperty = + Preconditions.checkNotNull(properties.getProperty(Constants.DRUID_QUERY_FIELD_NAMES, null)); + final String fieldTypesProperty = + Preconditions.checkNotNull(properties.getProperty(Constants.DRUID_QUERY_FIELD_TYPES, null)); + if (fieldNamesProperty.isEmpty()) { + // this might seem counter intuitive but some queries like query + // SELECT YEAR(Calcs.date0) AS yr_date0_ok FROM druid_tableau.calcs Calcs WHERE (YEAR(Calcs.date0) IS NULL) LIMIT 1 + // is planed in a way where we only push a filter down and keep the project of null as hive project. Thus empty columns + columnNames = Collections.EMPTY_LIST; + columnTypes = Collections.EMPTY_LIST; + } else { + columnNames = + Arrays.stream(fieldNamesProperty.trim().split(",")).collect(Collectors.toList()); + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(fieldTypesProperty).stream() + .map(e -> TypeInfoFactory.getPrimitiveTypeInfo(e.getTypeName())) + .map(primitiveTypeInfo -> { + if (primitiveTypeInfo instanceof TimestampLocalTZTypeInfo) { + return tsTZTypeInfo; + } + return primitiveTypeInfo; + }).collect(Collectors.toList()); } + columns = new String[columnNames.size()]; + types = new PrimitiveTypeInfo[columnNames.size()]; + for (int i = 0; i < columnTypes.size(); ++i) { + columns[i] = columnNames.get(i); + types[i] = columnTypes.get(i); + inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(types[i])); + } + inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); } /* Submits the request and returns */ @@ -315,178 +294,6 @@ protected SegmentAnalysis submitMetadataRequest(String address, SegmentMetadataQ return resultsList.get(0); } - /* Timeseries query */ - private void inferSchema(TimeseriesQuery query, TimestampLocalTZTypeInfo timeColumnTypeInfo, - List columnNames, List columnTypes, - Map mapColumnNamesTypes) { - // Timestamp column - columnNames.add(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN); - columnTypes.add(timeColumnTypeInfo); - // Aggregator columns - for (AggregatorFactory af : query.getAggregatorSpecs()) { - columnNames.add(af.getName()); - PrimitiveTypeInfo typeInfo = mapColumnNamesTypes.get(af.getName()); - if (typeInfo != null) { - // If datasource was created by Hive, we consider Hive type - columnTypes.add(typeInfo); - } else { - columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName())); - } - } - // Post-aggregator columns - // TODO: Currently Calcite only infers avg for post-aggregate, - // but once we recognize other functions, we will need to infer - // different types for post-aggregation functions - for (PostAggregator pa : query.getPostAggregatorSpecs()) { - columnNames.add(pa.getName()); - columnTypes.add(TypeInfoFactory.floatTypeInfo); - } - } - - /* TopN query */ - private void inferSchema(TopNQuery query, TimestampLocalTZTypeInfo timeColumnTypeInfo, - List columnNames, List columnTypes, - Map mapColumnNamesTypes) { - // Timestamp column - columnNames.add(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN); - columnTypes.add(timeColumnTypeInfo); - // Dimension column - columnNames.add(query.getDimensionSpec().getOutputName()); - columnTypes.add(TypeInfoFactory.stringTypeInfo); - // Aggregator columns - for (AggregatorFactory af : query.getAggregatorSpecs()) { - columnNames.add(af.getName()); - PrimitiveTypeInfo typeInfo = mapColumnNamesTypes.get(af.getName()); - if (typeInfo != null) { - // If datasource was created by Hive, we consider Hive type - columnTypes.add(typeInfo); - } else { - columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName())); - } - } - // Post-aggregator columns - // TODO: Currently Calcite only infers avg for post-aggregate, - // but once we recognize other functions, we will need to infer - // different types for post-aggregation functions - for (PostAggregator pa : query.getPostAggregatorSpecs()) { - columnNames.add(pa.getName()); - columnTypes.add(TypeInfoFactory.floatTypeInfo); - } - } - - /* Select query */ - private void inferSchema(SelectQuery query, TimestampLocalTZTypeInfo timeColumnTypeInfo, - List columnNames, List columnTypes, - String address, Map mapColumnNamesTypes) - throws SerDeException { - // Timestamp column - columnNames.add(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN); - columnTypes.add(timeColumnTypeInfo); - // Dimension columns - for (DimensionSpec ds : query.getDimensions()) { - columnNames.add(ds.getOutputName()); - columnTypes.add(TypeInfoFactory.stringTypeInfo); - } - // The type for metric columns is not explicit in the query, thus in this case - // we need to emit a metadata query to know their type - SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder(); - builder.dataSource(query.getDataSource()); - builder.merge(true); - builder.analysisTypes(); - SegmentMetadataQuery metadataQuery = builder.build(); - // Execute query in Druid - SegmentAnalysis schemaInfo; - try { - schemaInfo = submitMetadataRequest(address, metadataQuery); - } catch (IOException e) { - throw new SerDeException(e); - } - if (schemaInfo == null) { - throw new SerDeException("Connected to Druid but could not retrieve datasource information"); - } - for (String metric : query.getMetrics()) { - columnNames.add(metric); - PrimitiveTypeInfo typeInfo = mapColumnNamesTypes.get(metric); - if (typeInfo != null) { - // If datasource was created by Hive, we consider Hive type - columnTypes.add(typeInfo); - } else { - columnTypes.add(DruidSerDeUtils.convertDruidToHiveType( - schemaInfo.getColumns().get(metric).getType())); - } - } - } - - /* Scan query */ - private void inferSchema(ScanQuery query, TimestampLocalTZTypeInfo timeColumnTypeInfo, - List columnNames, List columnTypes, - String address, Map mapColumnNamesTypes) - throws SerDeException { - // The type for metric columns is not explicit in the query, thus in this case - // we need to emit a metadata query to know their type - SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder(); - builder.dataSource(query.getDataSource()); - builder.merge(true); - builder.analysisTypes(); - SegmentMetadataQuery metadataQuery = builder.build(); - // Execute query in Druid - SegmentAnalysis schemaInfo; - try { - schemaInfo = submitMetadataRequest(address, metadataQuery); - } catch (IOException e) { - throw new SerDeException(e); - } - if (schemaInfo == null) { - throw new SerDeException("Connected to Druid but could not retrieve datasource information"); - } - for (String column : query.getColumns()) { - columnNames.add(column); - PrimitiveTypeInfo typeInfo = mapColumnNamesTypes.get(column); - if (typeInfo != null) { - // If datasource was created by Hive, we consider Hive type - columnTypes.add(typeInfo); - } else { - ColumnAnalysis columnAnalysis = schemaInfo.getColumns().get(column); - // If column is absent from Druid consider it as a dimension with type string. - String type = columnAnalysis == null ? DruidSerDeUtils.STRING_TYPE : columnAnalysis.getType(); - columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(type)); - } - } - } - - /* GroupBy query */ - private void inferSchema(GroupByQuery query, TimestampLocalTZTypeInfo timeColumnTypeInfo, - List columnNames, List columnTypes, - Map mapColumnNamesTypes) { - // Timestamp column - columnNames.add(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN); - columnTypes.add(timeColumnTypeInfo); - // Dimension columns - for (DimensionSpec ds : query.getDimensions()) { - columnNames.add(ds.getOutputName()); - columnTypes.add(DruidSerDeUtils.extractTypeFromDimension(ds)); - } - // Aggregator columns - for (AggregatorFactory af : query.getAggregatorSpecs()) { - columnNames.add(af.getName()); - PrimitiveTypeInfo typeInfo = mapColumnNamesTypes.get(af.getName()); - if (typeInfo != null) { - // If datasource was created by Hive, we consider Hive type - columnTypes.add(typeInfo); - } else { - columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName())); - } - } - // Post-aggregator columns - // TODO: Currently Calcite only infers avg for post-aggregate, - // but once we recognize other functions, we will need to infer - // different types for post-aggregation functions - for (PostAggregator pa : query.getPostAggregatorSpecs()) { - columnNames.add(pa.getName()); - columnTypes.add(TypeInfoFactory.floatTypeInfo); - } - } - @Override public Class getSerializedClass() { return DruidWritable.class; @@ -505,7 +312,7 @@ public Writable serialize(Object o, ObjectInspector objectInspector) throws SerD List fields = soi.getAllStructFieldRefs(); List values = soi.getStructFieldsDataAsList(o); // We deserialize the result - Map value = new HashMap<>(); + final Map value = new HashMap<>(); for (int i = 0; i < columns.length; i++) { if (values.get(i) == null) { // null, we just add it @@ -514,6 +321,11 @@ public Writable serialize(Object o, ObjectInspector objectInspector) throws SerD } final Object res; switch (types[i].getPrimitiveCategory()) { + case TIMESTAMP: + res = ((TimestampObjectInspector) fields.get(i).getFieldObjectInspector()) + .getPrimitiveJavaObject( + values.get(i)).getTime(); + break; case TIMESTAMPLOCALTZ: res = ((TimestampLocalTZObjectInspector) fields.get(i).getFieldObjectInspector()) .getPrimitiveJavaObject(values.get(i)).getZonedDateTime().toInstant().toEpochMilli(); @@ -598,8 +410,8 @@ public SerDeStats getSerDeStats() { @Override public Object deserialize(Writable writable) throws SerDeException { - DruidWritable input = (DruidWritable) writable; - List output = Lists.newArrayListWithExpectedSize(columns.length); + final DruidWritable input = (DruidWritable) writable; + final List output = Lists.newArrayListWithExpectedSize(columns.length); for (int i = 0; i < columns.length; i++) { final Object value = input.getValue().get(columns[i]); if (value == null) { @@ -607,6 +419,12 @@ public Object deserialize(Writable writable) throws SerDeException { continue; } switch (types[i].getPrimitiveCategory()) { + case TIMESTAMP: + output.add(new TimestampWritable(Timestamp.valueOf(ZonedDateTime + .ofInstant(Instant.ofEpochMilli(((Number) value).longValue()), + tsTZTypeInfo.timeZone() + ).format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")).toString()))); + break; case TIMESTAMPLOCALTZ: output.add( new TimestampLocalTZWritable( @@ -664,7 +482,7 @@ public Object deserialize(Writable writable) throws SerDeException { } @Override - public ObjectInspector getObjectInspector() throws SerDeException { + public ObjectInspector getObjectInspector() { return inspector; } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDeUtils.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDeUtils.java index 673420b0df..1c34e418cb 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDeUtils.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDeUtils.java @@ -65,22 +65,4 @@ public static PrimitiveTypeInfo convertDruidToHiveType(String typeName) { } } - /* Extract type from dimension spec. It returns TIMESTAMP if it is a FLOOR, - * INTEGER if it is a EXTRACT, or STRING otherwise. */ - public static PrimitiveTypeInfo extractTypeFromDimension(DimensionSpec ds) { - if (ds instanceof ExtractionDimensionSpec) { - ExtractionDimensionSpec eds = (ExtractionDimensionSpec) ds; - TimeFormatExtractionFn tfe = (TimeFormatExtractionFn) eds.getExtractionFn(); - if (tfe.getFormat() == null || tfe.getFormat().equals(ISO_TIME_FORMAT)) { - // Timestamp (null or default used by FLOOR) - return TypeInfoFactory.timestampLocalTZTypeInfo; - } else { - // EXTRACT from timestamp - return TypeInfoFactory.intTypeInfo; - } - } - // Default - return TypeInfoFactory.stringTypeInfo; - } - } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTimeseriesQueryRecordReader.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTimeseriesQueryRecordReader.java index 571deec07a..7f103c84d4 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTimeseriesQueryRecordReader.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTimeseriesQueryRecordReader.java @@ -64,7 +64,7 @@ public NullWritable getCurrentKey() throws IOException, InterruptedException { public DruidWritable getCurrentValue() throws IOException, InterruptedException { // Create new value DruidWritable value = new DruidWritable(); - value.getValue().put(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis()); + value.getValue().put("timestamp", current.getTimestamp().getMillis()); value.getValue().putAll(current.getValue().getBaseObject()); return value; } @@ -74,7 +74,7 @@ public boolean next(NullWritable key, DruidWritable value) { if (nextKeyValue()) { // Update value value.getValue().clear(); - value.getValue().put(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis()); + value.getValue().put("timestamp", current.getTimestamp().getMillis()); value.getValue().putAll(current.getValue().getBaseObject()); return true; } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java index 45c22b0b9f..d082e919ca 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java @@ -76,7 +76,7 @@ public NullWritable getCurrentKey() throws IOException, InterruptedException { public DruidWritable getCurrentValue() throws IOException, InterruptedException { // Create new value DruidWritable value = new DruidWritable(); - value.getValue().put(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, + value.getValue().put("timestamp", current.getTimestamp().getMillis() ); if (values.hasNext()) { @@ -91,7 +91,7 @@ public boolean next(NullWritable key, DruidWritable value) { if (nextKeyValue()) { // Update value value.getValue().clear(); - value.getValue().put(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, + value.getValue().put("timestamp", current.getTimestamp().getMillis() ); if (values.hasNext()) { diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index f513fe5ff7..a3b386465d 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -1668,6 +1668,9 @@ spark.perf.disabled.query.files=query14.q,\ druid.query.files=druidmini_test1.q,\ druidmini_test_insert.q,\ druidmini_mv.q,\ - druid_timestamptz.q, \ - druidmini_dynamic_partition.q + druid_timestamptz.q,\ + druidmini_dynamic_partition.q,\ + druidmini_expressions.q,\ + druidmini_extractTime.q,\ + druidmini_floorTime.q diff --git pom.xml pom.xml index 5be30f6696..841d22753f 100644 --- pom.xml +++ pom.xml @@ -121,7 +121,7 @@ 1.10.0 1.7.7 0.8.0.RELEASE - 1.15.0 + 1.16.0 4.2.4 4.1.17 4.1.19 @@ -260,6 +260,19 @@ false + + calcite + calcite repository + https://repository.apache.org/content/repositories/orgapachecalcite-1042 + default + + true + warn + + + false + + diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index cd01094528..3f73fd7fcc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -30,6 +30,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.collect.ImmutableMap; import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.adapter.druid.DruidSchema; import org.apache.calcite.adapter.druid.DruidTable; @@ -385,7 +386,7 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false); tableRel = DruidQuery.create(cluster, cluster.traitSetOf(BindableConvention.INSTANCE), - optTable, druidTable, ImmutableList.of(scan)); + optTable, druidTable, ImmutableList.of(scan), ImmutableMap.of()); } else { // Build Hive Table Scan Rel RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveDruidRules.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveDruidRules.java index ba8e98e9d4..7d2b9dc59a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveDruidRules.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveDruidRules.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import org.apache.calcite.adapter.druid.DruidRules.DruidHavingFilterRule; import org.apache.calcite.adapter.druid.DruidRules.DruidAggregateFilterTransposeRule; import org.apache.calcite.adapter.druid.DruidRules.DruidAggregateProjectRule; import org.apache.calcite.adapter.druid.DruidRules.DruidAggregateRule; @@ -71,4 +72,7 @@ public static final DruidPostAggregationProjectRule POST_AGGREGATION_PROJECT = new DruidPostAggregationProjectRule(HiveRelFactories.HIVE_BUILDER); + + public static final DruidHavingFilterRule HAVING_FILTER_RULE = + new DruidHavingFilterRule(HiveRelFactories.HIVE_BUILDER); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java index ce5e354ced..df9c1802c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java @@ -31,21 +31,27 @@ public class HiveMaterializedViewRule { public static final MaterializedViewProjectFilterRule INSTANCE_PROJECT_FILTER = - new MaterializedViewProjectFilterRule(HiveRelFactories.HIVE_BUILDER, true); + new MaterializedViewProjectFilterRule(HiveRelFactories.HIVE_BUILDER, true, + null, false); public static final MaterializedViewOnlyFilterRule INSTANCE_FILTER = - new MaterializedViewOnlyFilterRule(HiveRelFactories.HIVE_BUILDER, true); + new MaterializedViewOnlyFilterRule(HiveRelFactories.HIVE_BUILDER, true, + null, false); public static final MaterializedViewProjectJoinRule INSTANCE_PROJECT_JOIN = - new MaterializedViewProjectJoinRule(HiveRelFactories.HIVE_BUILDER, true); + new MaterializedViewProjectJoinRule(HiveRelFactories.HIVE_BUILDER, true, + null, false); public static final MaterializedViewOnlyJoinRule INSTANCE_JOIN = - new MaterializedViewOnlyJoinRule(HiveRelFactories.HIVE_BUILDER, true); + new MaterializedViewOnlyJoinRule(HiveRelFactories.HIVE_BUILDER, true, + null, false); public static final MaterializedViewProjectAggregateRule INSTANCE_PROJECT_AGGREGATE = - new MaterializedViewProjectAggregateRule(HiveRelFactories.HIVE_BUILDER, true); + new MaterializedViewProjectAggregateRule(HiveRelFactories.HIVE_BUILDER, true, + null); public static final MaterializedViewOnlyAggregateRule INSTANCE_AGGREGATE = - new MaterializedViewOnlyAggregateRule(HiveRelFactories.HIVE_BUILDER, true); + new MaterializedViewOnlyAggregateRule(HiveRelFactories.HIVE_BUILDER, true, + null); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index 4d12c8c0dc..79fcfcf0a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.translator; import java.math.BigDecimal; +import java.util.stream.Collectors; import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.rel.RelNode; @@ -74,12 +75,28 @@ public static ASTNode table(RelNode scan) { ASTBuilder propList = ASTBuilder.construct(HiveParser.TOK_TABLEPROPLIST, "TOK_TABLEPROPLIST"); if (scan instanceof DruidQuery) { - // Pass possible query to Druid + //Passing query spec, column names and column types to be used as part of Hive Physical execution DruidQuery dq = (DruidQuery) scan; + //Adding Query specs to be used by org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY") .add(HiveParser.StringLiteral, "\"" + Constants.DRUID_QUERY_JSON + "\"") .add(HiveParser.StringLiteral, "\"" + SemanticAnalyzer.escapeSQLString( dq.getQueryString()) + "\"")); + // Adding column names used later by org.apache.hadoop.hive.druid.serde.DruidSerDe + propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY") + .add(HiveParser.StringLiteral, "\"" + Constants.DRUID_QUERY_FIELD_NAMES + "\"") + .add(HiveParser.StringLiteral, + "\"" + dq.getRowType().getFieldNames().stream().map(Object::toString) + .collect(Collectors.joining(",")) + "\"" + )); + // Adding column types used later by org.apache.hadoop.hive.druid.serde.DruidSerDe + propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY") + .add(HiveParser.StringLiteral, "\"" + Constants.DRUID_QUERY_FIELD_TYPES + "\"") + .add(HiveParser.StringLiteral, + "\"" + dq.getRowType().getFieldList().stream() + .map(e -> TypeConverter.convert(e.getType()).getTypeName()) + .collect(Collectors.joining(",")) + "\"" + )); propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY") .add(HiveParser.StringLiteral, "\"" + Constants.DRUID_QUERY_TYPE + "\"") .add(HiveParser.StringLiteral, "\"" + dq.getQueryType().getQueryName() + "\"")); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 3520d90fa8..448463fe74 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -44,14 +44,18 @@ import java.util.concurrent.atomic.AtomicInteger; import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeVisitor; import org.antlr.runtime.tree.TreeVisitorAction; +import org.apache.calcite.adapter.druid.DirectOperatorConversion; import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.adapter.druid.DruidSchema; import org.apache.calcite.adapter.druid.DruidTable; +import org.apache.calcite.adapter.druid.ExtractOperatorConversion; +import org.apache.calcite.adapter.druid.FloorOperatorConversion; import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.config.CalciteConnectionConfigImpl; import org.apache.calcite.config.CalciteConnectionProperty; @@ -106,11 +110,13 @@ import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlLiteral; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlWindow; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.SqlTypeName; @@ -162,8 +168,11 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; @@ -274,6 +283,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Multimap; + public class CalcitePlanner extends SemanticAnalyzer { private final AtomicInteger noColsMissingStats = new AtomicInteger(0); @@ -1688,6 +1698,7 @@ private RelNode copyNodeScan(RelNode scan) { HiveDruidRules.POST_AGGREGATION_PROJECT, HiveDruidRules.FILTER_AGGREGATE_TRANSPOSE, HiveDruidRules.FILTER_PROJECT_TRANSPOSE, + HiveDruidRules.HAVING_FILTER_RULE, HiveDruidRules.SORT_PROJECT_TRANSPOSE, HiveDruidRules.SORT, HiveDruidRules.PROJECT_SORT_TRANSPOSE @@ -2471,6 +2482,8 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); List druidColTypes = new ArrayList<>(); List druidColNames = new ArrayList<>(); + //@TODO FIX this, we actually do not need this anymore, + // in addition to that Druid allow numeric dimensions now so this check is not accurate for (RelDataTypeField field : rowType.getFieldList()) { if (DruidTable.DEFAULT_TIMESTAMP_COLUMN.equals(field.getName())) { // Druid's time column is always not null. @@ -2503,8 +2516,9 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP), qb.isInsideView() || qb.getAliasInsideView().contains(tableAlias.toLowerCase())); + // Default Druid Standard tableRel = DruidQuery.create(cluster, cluster.traitSetOf(BindableConvention.INSTANCE), - optTable, druidTable, ImmutableList.of(scan)); + optTable, druidTable, ImmutableList.of(scan), DruidSqlOperatorConverter.getDefaultMap()); } else { // Build row type from field RelDataType rowType = inferNotNullableColumns(tabMetaData, TypeConverter.getType(cluster, rr, null)); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java new file mode 100644 index 0000000000..2438a86199 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import com.google.common.base.Function; +import com.google.common.collect.Maps; +import org.apache.calcite.adapter.druid.DirectOperatorConversion; +import org.apache.calcite.adapter.druid.DruidExpressions; +import org.apache.calcite.adapter.druid.DruidQuery; +import org.apache.calcite.adapter.druid.ExtractOperatorConversion; +import org.apache.calcite.adapter.druid.FloorOperatorConversion; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; + +import javax.annotation.Nullable; +import java.util.HashMap; +import java.util.Map; + +/** + * Contains custom Druid SQL operator converter classes, contains either: + * Hive specific OperatorConversion logic that can not be part of Calcite + * Some temporary OperatorConversion that is not release by Calcite yet + */ +public class DruidSqlOperatorConverter { + private DruidSqlOperatorConverter() { + } + private static Map druidOperatorMap = null; + + public static final Map getDefaultMap() { + if (druidOperatorMap == null) { + druidOperatorMap = + new HashMap(); + DruidQuery.DEFAULT_OPERATORS_LIST.stream() + .forEach(op -> druidOperatorMap.put(op.calciteOperator(), op)); + + //Override Hive specific operators + druidOperatorMap.putAll(Maps.asMap(HiveFloorDate.ALL_FUNCTIONS, + (Function) input -> new FloorOperatorConversion() + )); + druidOperatorMap.putAll(Maps.asMap(HiveExtractDate.ALL_FUNCTIONS, + (Function) input -> new ExtractOperatorConversion() + )); + druidOperatorMap + .put(HiveConcat.INSTANCE, new DirectOperatorConversion(HiveConcat.INSTANCE, "concat")); + druidOperatorMap.put(SqlStdOperatorTable.SUBSTRING, + new DruidSqlOperatorConverter.DruidSubstringOperatorConversion() + ); + } + return druidOperatorMap; + } + + //@TODO remove this when it is fixed in calcite https://issues.apache.org/jira/browse/HIVE-18996 + public static class DruidSubstringOperatorConversion extends org.apache.calcite.adapter.druid.SubstringOperatorConversion { + @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, + DruidQuery query + ) { + final RexCall call = (RexCall) rexNode; + final String arg = DruidExpressions.toDruidExpression( + call.getOperands().get(0), rowType, query); + if (arg == null) { + return null; + } + + final String indexStart; + final String length; + // SQL is 1-indexed, Druid is 0-indexed. + if (!call.getOperands().get(1).isA(SqlKind.LITERAL)) { + final String indexExp = DruidExpressions.toDruidExpression( + call.getOperands().get(1), rowType, query); + if (indexExp == null) { + return null; + } + indexStart = DruidQuery.format("(%s - 1)", indexExp); + } else { + final int index = RexLiteral.intValue(call.getOperands().get(1)) - 1; + indexStart = DruidExpressions.numberLiteral(index); + } + + if (call.getOperands().size() > 2) { + //case substring from index with length + if (!call.getOperands().get(2).isA(SqlKind.LITERAL)) { + length = DruidExpressions.toDruidExpression( + call.getOperands().get(2), rowType, query); + if (length == null) { + return null; + } + } else { + length = DruidExpressions.numberLiteral(RexLiteral.intValue(call.getOperands().get(2))); + } + + } else { + //case substring from index to the end + length = DruidExpressions.numberLiteral(-1); + } + return DruidQuery.format("substring(%s, %s, %s)", + arg, + indexStart, + length); + } + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java index 38dfbf4b5d..e8dd57228f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java @@ -85,8 +85,8 @@ public void testRuleFiredOnlyOnce() { planner.findBestExp(); - // Matches 3 times: 2 times the original node, 1 time the new node created by the rule - assertEquals(3, DummyRule.INSTANCE.numberMatches); + // Matches 2 times: one time the original node, one time the new node created by the rule + assertEquals(2, DummyRule.INSTANCE.numberMatches); // It is fired only once: on the original node assertEquals(1, DummyRule.INSTANCE.numberOnMatch); } diff --git ql/src/test/queries/clientpositive/druid_timestamptz.q ql/src/test/queries/clientpositive/druid_timestamptz.q index 1e963f98d4..63c6e4e211 100644 --- ql/src/test/queries/clientpositive/druid_timestamptz.q +++ ql/src/test/queries/clientpositive/druid_timestamptz.q @@ -1,5 +1,6 @@ set hive.fetch.task.conversion=more; + drop table tstz1; create table tstz1(`__time` timestamp with local time zone, n string, v integer) @@ -9,12 +10,42 @@ TBLPROPERTIES ("druid.segment.granularity" = "HOUR"); insert into table tstz1 values(cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone), 'Bill', 10); +EXPLAIN select `__time` from tstz1; select `__time` from tstz1; + +EXPLAIN select cast(`__time` as timestamp) from tstz1; select cast(`__time` as timestamp) from tstz1; + +EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone); select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone); -set time zone UTC; +EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1; +SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1; + +EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1; +SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1; + +set time zone UTC; +EXPLAIN select `__time` from tstz1; select `__time` from tstz1; +EXPLAIN select cast(`__time` as timestamp) from tstz1; select cast(`__time` as timestamp) from tstz1; +EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone); select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone); + +-- THIS is failing explore why +--EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` = cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone); +--select cast(`__time` as timestamp) from tstz1 where `__time` = cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone); + +EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 20:26:34' as timestamp); +select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 20:26:34' as timestamp); + +EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) AND `__time` <= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone); +select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) AND `__time` <= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone); + +EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1; +SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1; + +EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1; +SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1; diff --git ql/src/test/queries/clientpositive/druidmini_dynamic_partition.q ql/src/test/queries/clientpositive/druidmini_dynamic_partition.q index 2552717afe..9d20123f40 100644 --- ql/src/test/queries/clientpositive/druidmini_dynamic_partition.q +++ ql/src/test/queries/clientpositive/druidmini_dynamic_partition.q @@ -62,8 +62,13 @@ SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, cboolean1, cboolean2 FROM alltypesorc where ctimestamp1 IS NOT NULL; +-- @FIXME https://issues.apache.org/jira/browse/HIVE-19011 +-- SELECT sum(cint), max(cbigint), sum(cbigint), max(cint) FROM druid_partitioned_table; +-- SELECT sum(cint), max(cbigint), sum(cbigint), max(cint) FROM druid_partitioned_table_0; -SELECT sum(cfloat) FROM druid_partitioned_table ; + +SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table; +SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table_0; SELECT floor_hour(cast(`ctimestamp1` as timestamp with local time zone)) as `__time`, cstring1, @@ -106,8 +111,10 @@ SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, cboolean1, cboolean2 FROM alltypesorc where ctimestamp2 IS NOT NULL; +-- @FIXME https://issues.apache.org/jira/browse/HIVE-19011 +-- SELECT sum(cint), max(cbigint), sum(cbigint), max(cint) FROM druid_partitioned_table; -SELECT sum(cfloat) FROM druid_partitioned_table ; +SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table; EXPLAIN INSERT OVERWRITE TABLE druid_partitioned_table SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, @@ -138,8 +145,11 @@ INSERT OVERWRITE TABLE druid_partitioned_table cboolean2 FROM alltypesorc where ctimestamp1 IS NOT NULL; - SELECT sum(cfloat) FROM druid_partitioned_table ; +-- @FIXME https://issues.apache.org/jira/browse/HIVE-19011 +--SELECT sum(cint), max(cbigint), sum(cbigint), max(cint) FROM druid_partitioned_table ; +--SELECT sum(cint), max(cbigint), sum(cbigint), max(cint) FROM druid_partitioned_table_0; +SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table ; set hive.druid.indexer.partition.size.max=10; @@ -163,8 +173,13 @@ CREATE TABLE druid_max_size_partition cboolean2 FROM alltypesorc where ctimestamp1 IS NOT NULL; -SELECT sum(cfloat) FROM druid_max_size_partition ; +SELECT sum(cint), sum(cbigint) FROM druid_max_size_partition ; + +-- @FIXME https://issues.apache.org/jira/browse/HIVE-19011 +--SELECT sum(cint), max(cbigint), sum(cbigint), max(cint) FROM druid_max_size_partition ; +--SELECT sum(cint), max(cbigint), sum(cbigint), max(cint) FROM druid_partitioned_table_0; +--SELECT sum(cint), max(cbigint), sum(cbigint), max(cint) FROM druid_partitioned_table ; - DROP TABLE druid_partitioned_table_0; - DROP TABLE druid_partitioned_table; - DROP TABLE druid_max_size_partition; \ No newline at end of file +DROP TABLE druid_partitioned_table_0; +DROP TABLE druid_partitioned_table; +DROP TABLE druid_max_size_partition; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/druidmini_expressions.q ql/src/test/queries/clientpositive/druidmini_expressions.q new file mode 100644 index 0000000000..50d9cf4fd1 --- /dev/null +++ ql/src/test/queries/clientpositive/druidmini_expressions.q @@ -0,0 +1,52 @@ +CREATE TABLE druid_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL; + + -- MATH AND STRING functions + +SELECT count(*) FROM druid_table WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3; + +SELECT count(*) FROM druid_table WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10; + +SELECT count(*) FROM druid_table WHERE power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3; + +SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000 OR ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1; + +SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000; + +SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1; + +SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE SIN(cdouble) > 1; + +SELECT cstring1 || '_'|| cstring2, substring(cstring2, 2, 3) as concat , upper(cstring2), lower(cstring1), SUM(cdouble) as s FROM druid_table WHERE cstring1 IS NOT NULL AND cstring2 IS NOT NULL AND cstring2 like 'Y%' + GROUP BY cstring1 || '_'|| cstring2, substring(cstring2, 2, 3), upper(cstring2), lower(cstring1) ORDER BY concat DESC LIMIT 10; + +EXPLAIN SELECT count(*) FROM druid_table WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3; + +EXPLAIN SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) + FROM druid_table WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000 OR ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1; + +EXPLAIN SELECT cstring1 || '_'|| cstring2, substring(cstring2, 2, 3) as concat , upper(cstring2), lower(cstring1), SUM(cdouble) as s FROM druid_table WHERE cstring1 IS NOT NULL AND cstring2 IS NOT NULL AND cstring2 like 'Y%' + GROUP BY cstring1 || '_'|| cstring2, substring(cstring2, 2, 3), upper(cstring2), lower(cstring1) ORDER BY concat DESC LIMIT 10; + + + + +DROP TABLE druid_table; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/druidmini_extractTime.q ql/src/test/queries/clientpositive/druidmini_extractTime.q new file mode 100644 index 0000000000..106fddb1d8 --- /dev/null +++ ql/src/test/queries/clientpositive/druidmini_extractTime.q @@ -0,0 +1,163 @@ +CREATE TABLE druid_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL; + + +-- GROUP BY TIME EXTRACT +--SECONDS +SELECT EXTRACT(SECOND from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(SECOND from `__time`); + +EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(SECOND from `__time`); + + +-- MINUTES +SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MINUTE from `__time`); + +EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MINUTE from `__time`); + +-- HOUR +SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(HOUR from `__time`); + +EXPLAIN SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(HOUR from `__time`); + +-- DAY +SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`); + +EXPLAIN SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`); + +--WEEK +SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`); + + +EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`); + +--MONTH +SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`); + +EXPLAIN SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`); + +--QUARTER + +SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`); + +EXPLAIN SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`); + +-- YEAR +SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`); + + +EXPLAIN SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`); + +-- SELECT WITHOUT GROUP BY + +-- SECOND + +EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1; + +SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1; + +-- MINUTE + +EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE EXTRACT(MINUTE from `__time`) >= 0 LIMIT 2; + +SELECT EXTRACT(MINUTE from `__time`) as minute FROM druid_table + WHERE EXTRACT(MINUTE from `__time`) >= 0 order by minute LIMIT 2; +-- HOUR + +EXPLAIN SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1; + +SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1; + +--DAY + +EXPLAIN SELECT EXTRACT(DAY from `__time`), EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS day_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1; + +SELECT EXTRACT(DAY from `__time`) , EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS dar_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 ; + +-- WEEK + +EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1; + +SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 ; + +--MONTH + +EXPLAIN SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table +WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1; + +SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table + WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1; + + +--QUARTER + +EXPLAIN SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1; + +SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1; + +--YEAR + +EXPLAIN SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) AS year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1; + +SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) as year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1; + + +DROP TABLE druid_table; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/druidmini_floorTime.q ql/src/test/queries/clientpositive/druidmini_floorTime.q new file mode 100644 index 0000000000..b511a5c08c --- /dev/null +++ ql/src/test/queries/clientpositive/druidmini_floorTime.q @@ -0,0 +1,163 @@ +CREATE TABLE druid_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL; + + +-- GROUP BY TIME EXTRACT +--SECONDS +SELECT floor(`__time` to SECOND) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to SECOND); + +EXPLAIN SELECT floor(`__time` to SECOND) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to SECOND); + + +-- MINUTES +SELECT floor(`__time` to MINUTE) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to MINUTE); + +EXPLAIN SELECT floor(`__time` to MINUTE) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to MINUTE); + +-- HOUR +SELECT floor(`__time` to HOUR) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to HOUR); + +EXPLAIN SELECT floor(`__time` to HOUR) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to HOUR); + +-- DAY +SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`); + +EXPLAIN SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`); + +--WEEK +SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`); + + +EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`); + +--MONTH +SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`); + +EXPLAIN SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`); + +--QUARTER + +SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`); + +EXPLAIN SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`); + +-- YEAR +SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`); + + +EXPLAIN SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`); + +-- SELECT WITHOUT GROUP BY + +-- SECOND + +EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1; + +SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1; + +-- MINUTE + +EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE EXTRACT(MINUTE from `__time`) >= 0 LIMIT 2; + +SELECT EXTRACT(MINUTE from `__time`) as minute FROM druid_table + WHERE EXTRACT(MINUTE from `__time`) >= 0 order by minute LIMIT 2; +-- HOUR + +EXPLAIN SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1; + +SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1; + +--DAY + +EXPLAIN SELECT EXTRACT(DAY from `__time`), EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS day_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1; + +SELECT EXTRACT(DAY from `__time`) , EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS dar_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 ; + +-- WEEK + +EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1; + +SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 ; + +--MONTH + +EXPLAIN SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table +WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1; + +SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table + WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1; + + +--QUARTER + +EXPLAIN SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1; + +SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1; + +--YEAR + +EXPLAIN SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) AS year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1; + +SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) as year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1; + + +DROP TABLE druid_table; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/druidmini_mv.q ql/src/test/queries/clientpositive/druidmini_mv.q index 21653b7985..f5ea76b70c 100644 --- ql/src/test/queries/clientpositive/druidmini_mv.q +++ ql/src/test/queries/clientpositive/druidmini_mv.q @@ -12,22 +12,23 @@ AS SELECT cast(current_timestamp() AS timestamp) AS t, cast(a AS int) AS a, cast(b AS varchar(256)) AS b, + cast(userid AS varchar(256)) AS userid, cast(c AS double) AS c, cast(d AS int) AS d FROM TABLE ( VALUES - (1, 'alfred', 10.30, 2), - (2, 'bob', 3.14, 3), - (2, 'bonnie', 172342.2, 3), - (3, 'calvin', 978.76, 3), - (3, 'charlie', 9.8, 1), - (3, 'charlie', 15.8, 1)) as q (a, b, c, d); + (1, 'alfred', 'alfred', 10.30, 2), + (2, 'bob', 'bob', 3.14, 3), + (2, 'bonnie', 'bonnie', 172342.2, 3), + (3, 'calvin', 'calvin', 978.76, 3), + (3, 'charlie', 'charlie_a', 9.8, 1), + (3, 'charlie', 'charlie_b', 15.8, 1)) as q (a, b, userid, c, d); CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c, userid FROM cmv_basetable WHERE a = 2; @@ -39,7 +40,7 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS cmv_mat_view2 ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c, userid FROM cmv_basetable WHERE a = 3; @@ -70,7 +71,7 @@ SELECT * FROM ( ON table1.a = table2.a); INSERT INTO cmv_basetable VALUES - (cast(current_timestamp() AS timestamp), 3, 'charlie', 15.8, 1); + (cast(current_timestamp() AS timestamp), 3, 'charlie', 'charlie_c', 15.8, 1); -- TODO: CANNOT USE THE VIEW, IT IS OUTDATED EXPLAIN diff --git ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out index 3c3fed7bea..204c35be18 100644 --- ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out +++ ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out @@ -266,7 +266,7 @@ POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: default@cmv_mat_view2 -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[4][tables = [test_db_materialized_view_create_rewrite.cmv_mat_view2, cmv_basetable]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from ( (select a, c from cmv_basetable where a = 3) table1 @@ -292,28 +292,17 @@ STAGE PLANS: TableScan alias: default.cmv_mat_view2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: decimal(10,2)) - outputColumnNames: _col0 + Reduce Output Operator + sort order: Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,2)) + value expressions: a (type: int), c (type: decimal(10,2)) TableScan alias: cmv_basetable Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: decimal(10,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,2)) + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE + value expressions: a (type: int), c (type: decimal(10,2)), d (type: int) Reduce Operator Tree: Join Operator condition map: @@ -321,19 +310,22 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col1 (type: decimal(10,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col5, _col7, _col8 + Statistics: Num rows: 10 Data size: 3580 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 3) and (_col8 = 3)) (type: boolean) Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col0 (type: int), _col7 (type: decimal(10,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -341,7 +333,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[4][tables = [test_db_materialized_view_create_rewrite.cmv_mat_view2, cmv_basetable]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from ( (select a, c from cmv_basetable where a = 3) table1 join diff --git ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out index f2b5e8d3fa..496c94308d 100644 --- ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out +++ ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out @@ -24,6 +24,30 @@ values(cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local ti POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tstz1 +PREHOOK: query: EXPLAIN select `__time` from tstz1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select `__time` from tstz1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp with local time zone) + outputColumnNames: _col0 + ListSink + PREHOOK: query: select `__time` from tstz1 PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 @@ -33,6 +57,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 12:26:34.0 US/Pacific +PREHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + PREHOOK: query: select cast(`__time` as timestamp) from tstz1 PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 @@ -42,6 +90,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 12:26:34 +PREHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["2016-01-03T20:26:34.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + PREHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 @@ -51,6 +123,96 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 12:26:34 +PREHOOK: query: EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'HOUR','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +12 +PREHOOK: query: EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(\"__time\",'PT1H','','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 12:00:00 +PREHOOK: query: EXPLAIN select `__time` from tstz1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select `__time` from tstz1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp with local time zone) + outputColumnNames: _col0 + ListSink + PREHOOK: query: select `__time` from tstz1 PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 @@ -60,6 +222,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 20:26:34.0 UTC +PREHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + PREHOOK: query: select cast(`__time` as timestamp) from tstz1 PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 @@ -69,6 +255,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 20:26:34 +PREHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["2016-01-03T20:26:34.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + PREHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 @@ -78,3 +288,135 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 20:26:34 +PREHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 20:26:34' as timestamp) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 20:26:34' as timestamp) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["2016-01-03T20:26:34.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 20:26:34' as timestamp) +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 20:26:34' as timestamp) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 20:26:34 +PREHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) AND `__time` <= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) AND `__time` <= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["2016-01-03T20:26:34.000Z/2016-01-03T20:26:34.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) AND `__time` <= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) AND `__time` <= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 20:26:34 +PREHOOK: query: EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'HOUR','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(HOUR FROM CAST(`__time` AS timestamp)) FROM tstz1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +20 +PREHOOK: query: EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: tstz1 + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.tstz1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(\"__time\",'PT1H','','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT FLOOR(CAST(`__time` AS timestamp) to HOUR) FROM tstz1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +2016-01-03 20:00:00 diff --git ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out index 19d90082bf..154e5045c7 100644 --- ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out @@ -235,15 +235,24 @@ POSTHOOK: Lineage: druid_partitioned_table.csmallint SIMPLE [(alltypesorc)alltyp POSTHOOK: Lineage: druid_partitioned_table.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: druid_partitioned_table.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] POSTHOOK: Lineage: druid_partitioned_table.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table +PREHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table PREHOOK: type: QUERY PREHOOK: Input: default@druid_partitioned_table PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table +POSTHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_partitioned_table POSTHOOK: Output: hdfs://### HDFS PATH ### --39590.246 +1408069801800 10992545287 +PREHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table_0 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_partitioned_table_0 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table_0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_partitioned_table_0 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1408069801800 10992545287 PREHOOK: query: SELECT floor_hour(cast(`ctimestamp1` as timestamp with local time zone)) as `__time`, cstring1, cstring2, @@ -412,15 +421,15 @@ SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@druid_partitioned_table -PREHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table +PREHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table PREHOOK: type: QUERY PREHOOK: Input: default@druid_partitioned_table PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table +POSTHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_partitioned_table POSTHOOK: Output: hdfs://### HDFS PATH ### --46301.883 +2857395071862 -1661313883124 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE druid_partitioned_table SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, cstring1, @@ -549,15 +558,15 @@ POSTHOOK: query: INSERT OVERWRITE TABLE druid_partitioned_table POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@druid_partitioned_table -PREHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table +PREHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table PREHOOK: type: QUERY PREHOOK: Input: default@druid_partitioned_table PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table +POSTHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_partitioned_table POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_partitioned_table POSTHOOK: Output: hdfs://### HDFS PATH ### --39590.246 +1408069801800 10992545287 PREHOOK: query: CREATE TABLE druid_max_size_partition STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ( @@ -615,15 +624,15 @@ POSTHOOK: Lineage: druid_max_size_partition.csmallint SIMPLE [(alltypesorc)allty POSTHOOK: Lineage: druid_max_size_partition.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: druid_max_size_partition.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] POSTHOOK: Lineage: druid_max_size_partition.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: SELECT sum(cfloat) FROM druid_max_size_partition +PREHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_max_size_partition PREHOOK: type: QUERY PREHOOK: Input: default@druid_max_size_partition PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT sum(cfloat) FROM druid_max_size_partition +POSTHOOK: query: SELECT sum(cint), sum(cbigint) FROM druid_max_size_partition POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_max_size_partition POSTHOOK: Output: hdfs://### HDFS PATH ### --39590.246 +1408069801800 10992545287 PREHOOK: query: DROP TABLE druid_partitioned_table_0 PREHOOK: type: DROPTABLE PREHOOK: Input: default@druid_partitioned_table_0 diff --git ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out new file mode 100644 index 0000000000..a6fa042de7 --- /dev/null +++ ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -0,0 +1,267 @@ +PREHOOK: query: CREATE TABLE druid_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table +POSTHOOK: query: CREATE TABLE druid_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table +POSTHOOK: Lineage: druid_table.__time EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: druid_table.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: druid_table.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_table.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_table.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: druid_table.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: druid_table.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: druid_table.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: druid_table.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: druid_table.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] +POSTHOOK: Lineage: druid_table.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: -- MATH AND STRING functions + +SELECT count(*) FROM druid_table WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: -- MATH AND STRING functions + +SELECT count(*) FROM druid_table WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +837 +PREHOOK: query: SELECT count(*) FROM druid_table WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT count(*) FROM druid_table WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +4838 +PREHOOK: query: SELECT count(*) FROM druid_table WHERE power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT count(*) FROM druid_table WHERE power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +837 +PREHOOK: query: SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000 OR ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000 OR ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +19884.64700973034 27373419 14472 8.51628242804E11 851620413654 68151649880 +PREHOOK: query: SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +54430.27000427246 -3740445 51268 1.31919188502E11 131922984948 92160895030 +PREHOOK: query: SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +-2389.5169917345047 27640645 -5707 7.19705549994E11 719697428706 13774723379 +PREHOOK: query: SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE SIN(cdouble) > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) +FROM druid_table WHERE SIN(cdouble) > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: SELECT cstring1 || '_'|| cstring2, substring(cstring2, 2, 3) as concat , upper(cstring2), lower(cstring1), SUM(cdouble) as s FROM druid_table WHERE cstring1 IS NOT NULL AND cstring2 IS NOT NULL AND cstring2 like 'Y%' + GROUP BY cstring1 || '_'|| cstring2, substring(cstring2, 2, 3), upper(cstring2), lower(cstring1) ORDER BY concat DESC LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT cstring1 || '_'|| cstring2, substring(cstring2, 2, 3) as concat , upper(cstring2), lower(cstring1), SUM(cdouble) as s FROM druid_table WHERE cstring1 IS NOT NULL AND cstring2 IS NOT NULL AND cstring2 like 'Y%' + GROUP BY cstring1 || '_'|| cstring2, substring(cstring2, 2, 3), upper(cstring2), lower(cstring1) ORDER BY concat DESC LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +8Nj7qpHBTH1GUkMM1BXr2_YyROa06YMuK3C2eg85d yRO YYROA06YMUK3C2EG85D 8nj7qphbth1gukmm1bxr2 0.0 +k7rg3Vw6IpwU6_YyKI8Rb72WP5dP1BMSPoT yKI YYKI8RB72WP5DP1BMSPOT k7rg3vw6ipwu6 0.0 +b0r8g21X6I2TvvPj623IKR_YxSwHWr xSw YXSWHWR b0r8g21x6i2tvvpj623ikr 0.0 +ox4gTH52_YsjDHuPsD2 sjD YSJDHUPSD2 ox4gth52 0.0 +NEGa0N8MJ2dnn3MKAfl6u_Yr4e3n r4e YR4E3N nega0n8mj2dnn3mkafl6u 0.0 +767fOfF1Oj8fyOv6YFI16rM_YqdbA5 qdb YQDBA5 767foff1oj8fyov6yfi16rm 0.0 +kM4k0y1fqwton_YpK3CTDWEXOV pK3 YPK3CTDWEXOV km4k0y1fqwton 0.0 +TBI20Ba2YuO44754E2BM_YpB20i4 pB2 YPB20I4 tbi20ba2yuo44754e2bm 0.0 +jiqEpNs7qXo0y37_Ynnw5opXqf6BU nnw YNNW5OPXQF6BU jiqepns7qxo0y37 0.0 +TgS6dAlI2w4y_Ynh42DscA373RX27nBkft nh4 YNH42DSCA373RX27NBKFT tgs6dali2w4y 0.0 +PREHOOK: query: EXPLAIN SELECT count(*) FROM druid_table WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(*) FROM druid_table WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames __time,cstring1,cstring2,cdouble,cfloat,ctinyint,csmallint,cint,cbigint,cboolean1,cboolean2 + druid.fieldTypes timestamp with local time zone,string,string,double,float,tinyint,smallint,int,bigint,boolean,boolean + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"columns":["__time","cstring1","cstring2","cdouble","cfloat","ctinyint","csmallint","cint","cbigint","cboolean1","cboolean2"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 34864 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 11618 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3057 Data size: 11618 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) + FROM druid_table WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000 OR ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint) + FROM druid_table WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000 OR ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames $f0,_o__c1,_o__c2,_o__c3,$f4,$f5 + druid.fieldTypes double,int,bigint,double,bigint,bigint + druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"and","fields":[{"type":"expression","expression":"(ceil(\"cfloat\") > 0)"},{"type":"expression","expression":"((floor(\"cdouble\") * 2) < 1000)"}]},{"type":"and","fields":[{"type":"expression","expression":"((log(\"cdouble\") / 1.0) > 0)"},{"type":"expression","expression":"(cos(\"cint\") > 0)"}]},{"type":"expression","expression":"(sin(\"cdouble\") > 1)"}]},"aggregations":[{"type":"doubleSum","name":"$f0","expression":"(\"cfloat\" + CAST(1, 'DOUBLE'))"},{"type":"doubleSum","name":"$f1","expression":"(\"cdouble\" + CAST(\"ctinyint\", 'DOUBLE'))"},{"type":"longSum","name":"$f2","fieldName":"ctinyint"},{"type":"longSum","name":"$f3","fieldName":"csmallint"},{"type":"longSum","name":"$f4","fieldName":"cint"},{"type":"longSum","name":"$f5","fieldName":"cbigint"}],"postAggregations":[{"type":"expression","name":"_o__c1","expression":"CAST(\"$f1\", 'LONG')"},{"type":"expression","name":"_o__c2","expression":"(\"$f2\" + 1)"},{"type":"expression","name":"_o__c3","expression":"CAST((\"$f3\" + \"$f4\"), 'DOUBLE')"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: $f0 (type: double), _o__c1 (type: int), _o__c2 (type: bigint), _o__c3 (type: double), $f4 (type: bigint), $f5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: EXPLAIN SELECT cstring1 || '_'|| cstring2, substring(cstring2, 2, 3) as concat , upper(cstring2), lower(cstring1), SUM(cdouble) as s FROM druid_table WHERE cstring1 IS NOT NULL AND cstring2 IS NOT NULL AND cstring2 like 'Y%' + GROUP BY cstring1 || '_'|| cstring2, substring(cstring2, 2, 3), upper(cstring2), lower(cstring1) ORDER BY concat DESC LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cstring1 || '_'|| cstring2, substring(cstring2, 2, 3) as concat , upper(cstring2), lower(cstring1), SUM(cdouble) as s FROM druid_table WHERE cstring1 IS NOT NULL AND cstring2 IS NOT NULL AND cstring2 like 'Y%' + GROUP BY cstring1 || '_'|| cstring2, substring(cstring2, 2, 3), upper(cstring2), lower(cstring1) ORDER BY concat DESC LIMIT 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,vc0,vc1,vc2,$f4 + druid.fieldTypes string,string,string,string,double + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"},{"type":"default","dimension":"vc0","outputName":"vc0","outputType":"STRING"},{"type":"default","dimension":"vc1","outputName":"vc1","outputType":"STRING"},{"type":"default","dimension":"vc2","outputName":"vc2","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"concat(concat(\"cstring1\",'_'),\"cstring2\")","outputType":"STRING"},{"type":"expression","name":"vc0","expression":"substring(\"cstring2\", 1, 3)","outputType":"STRING"},{"type":"expression","name":"vc1","expression":"upper(\"cstring2\")","outputType":"STRING"},{"type":"expression","name":"vc2","expression":"lower(\"cstring1\")","outputType":"STRING"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"vc0","direction":"descending","dimensionOrder":"lexicographic"}]},"filter":{"type":"and","fields":[{"type":"expression","expression":"like(\"cstring2\",'Y%')"},{"type":"not","field":{"type":"selector","dimension":"cstring1","value":null}},{"type":"not","field":{"type":"selector","dimension":"cstring2","value":null}}]},"aggregations":[{"type":"doubleSum","name":"$f4","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: vc (type: string), vc0 (type: string), vc1 (type: string), vc2 (type: string), $f4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + ListSink + +PREHOOK: query: DROP TABLE druid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@druid_table +PREHOOK: Output: default@druid_table +POSTHOOK: query: DROP TABLE druid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: default@druid_table diff --git ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out new file mode 100644 index 0000000000..cf8161f4cb --- /dev/null +++ ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out @@ -0,0 +1,1025 @@ +PREHOOK: query: CREATE TABLE druid_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table +POSTHOOK: query: CREATE TABLE druid_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table +POSTHOOK: Lineage: druid_table.__time EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: druid_table.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: druid_table.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_table.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_table.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: druid_table.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: druid_table.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: druid_table.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: druid_table.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: druid_table.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] +POSTHOOK: Lineage: druid_table.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: SELECT EXTRACT(SECOND from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(SECOND from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(SECOND from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(SECOND from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(SECOND from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(SECOND from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: second(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MINUTE from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MINUTE from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +59 +PREHOOK: query: EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MINUTE from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MINUTE from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: minute(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(HOUR from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(HOUR from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +15 +16 +PREHOOK: query: EXPLAIN SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(HOUR from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(HOUR from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hour(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +31 +PREHOOK: query: EXPLAIN SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: day(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 +PREHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: weekofyear(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +12 +PREHOOK: query: EXPLAIN SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: month(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +4 +PREHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: quarter(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969 +PREHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"bound","dimension":"__time","lower":"0","lowerStrict":false,"upper":"0","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"s","timeZone":"US/Pacific","locale":"en-US"}},"virtualColumns":[{"type":"expression","name":"vc","expression":"0","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +0 +PREHOOK: query: EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE EXTRACT(MINUTE from `__time`) >= 0 LIMIT 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE EXTRACT(MINUTE from `__time`) >= 0 LIMIT 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"bound","dimension":"__time","lower":"0","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"m","timeZone":"US/Pacific","locale":"en-US"}},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'MINUTE','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":2} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(MINUTE from `__time`) as minute FROM druid_table + WHERE EXTRACT(MINUTE from `__time`) >= 0 order by minute LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(MINUTE from `__time`) as minute FROM druid_table + WHERE EXTRACT(MINUTE from `__time`) >= 0 order by minute LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +0 +PREHOOK: query: EXPLAIN SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Select Operator + expressions: hour(vc) (type: int) + outputColumnNames: _col0 + Limit + Number of rows: 1 + ListSink + +PREHOOK: query: SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +15 +PREHOOK: query: EXPLAIN SELECT EXTRACT(DAY from `__time`), EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS day_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(DAY from `__time`), EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS day_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,vc0,vc1 + druid.fieldTypes int,bigint,string + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 8, 2), 'DOUBLE') == 31)"},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"div(timestamp_extract(\"__time\",'DAY','US/Pacific'),7)","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 8, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int), vc0 (type: bigint), vc1 (type: string) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: SELECT EXTRACT(DAY from `__time`) , EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS dar_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(DAY from `__time`) , EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS dar_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +31 4 31 +31 4 31 +PREHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"w","timeZone":"US/Pacific","locale":"en-US"}},{"type":"expression","expression":"((div(timestamp_extract(\"__time\",'WEEK','US/Pacific'),4) + 1) == 1)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'WEEK','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 +1 +PREHOOK: query: EXPLAIN SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table +WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table +WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,vc0,vc1 + druid.fieldTypes double,int,string + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M","timeZone":"US/Pacific","locale":"en-US"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / CAST(4, 'DOUBLE')) + CAST(1, 'DOUBLE'))","outputType":"DOUBLE"},{"type":"expression","name":"vc0","expression":"timestamp_extract(\"__time\",'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 5, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: double), vc0 (type: int), vc1 (type: string) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table + WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table + WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +4.0 12 12 +4.0 12 12 +PREHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,vc0 + druid.fieldTypes int,double + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(timestamp_extract(\"__time\",'QUARTER','US/Pacific') >= 4)"},{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'QUARTER','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / CAST(4, 'DOUBLE')) + CAST(1, 'DOUBLE'))","outputType":"DOUBLE"}],"columns":["vc","vc0"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int), vc0 (type: double) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +4 4.0 +4 4.0 +PREHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) AS year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) AS year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,vc0 + druid.fieldTypes int,string + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"yyyy","timeZone":"US/Pacific","locale":"en-US"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pacific'), 'STRING') == '1969')"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 0, 4)","outputType":"STRING"}],"columns":["vc","vc0"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int), vc0 (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) as year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) as year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969 1969 +1969 1969 +PREHOOK: query: DROP TABLE druid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@druid_table +PREHOOK: Output: default@druid_table +POSTHOOK: query: DROP TABLE druid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: default@druid_table diff --git ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out new file mode 100644 index 0000000000..0405b7dd87 --- /dev/null +++ ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out @@ -0,0 +1,1026 @@ +PREHOOK: query: CREATE TABLE druid_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table +POSTHOOK: query: CREATE TABLE druid_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS +SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, + cstring1, + cstring2, + cdouble, + cfloat, + ctinyint, + csmallint, + cint, + cbigint, + cboolean1, + cboolean2 + FROM alltypesorc where ctimestamp1 IS NOT NULL +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table +POSTHOOK: Lineage: druid_table.__time EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: druid_table.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: druid_table.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_table.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_table.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: druid_table.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: druid_table.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: druid_table.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: druid_table.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: druid_table.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] +POSTHOOK: Lineage: druid_table.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: SELECT floor(`__time` to SECOND) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to SECOND) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT floor(`__time` to SECOND) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to SECOND) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 16:00:00.0 US/Pacific +PREHOOK: query: EXPLAIN SELECT floor(`__time` to SECOND) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to SECOND) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT floor(`__time` to SECOND) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to SECOND) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: floor_second(vc) (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: timestamp with local time zone) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp with local time zone) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp with local time zone) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT floor(`__time` to MINUTE) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to MINUTE) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT floor(`__time` to MINUTE) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to MINUTE) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 16:00:00.0 US/Pacific +PREHOOK: query: EXPLAIN SELECT floor(`__time` to MINUTE) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to MINUTE) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT floor(`__time` to MINUTE) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to MINUTE) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: floor_minute(vc) (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: timestamp with local time zone) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp with local time zone) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp with local time zone) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT floor(`__time` to HOUR) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to HOUR) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT floor(`__time` to HOUR) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to HOUR) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:00:00.0 US/Pacific +1969-12-31 16:00:00.0 US/Pacific +PREHOOK: query: EXPLAIN SELECT floor(`__time` to HOUR) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to HOUR) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT floor(`__time` to HOUR) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to HOUR) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: floor_hour(vc) (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: timestamp with local time zone) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp with local time zone) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp with local time zone) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +31 +PREHOOK: query: EXPLAIN SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(DAY from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: day(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 +PREHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: weekofyear(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +12 +PREHOOK: query: EXPLAIN SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(MONTH from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: month(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +4 +PREHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: quarter(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969 +PREHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year(vc) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"bound","dimension":"__time","lower":"0","lowerStrict":false,"upper":"0","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"s","timeZone":"US/Pacific","locale":"en-US"}},"virtualColumns":[{"type":"expression","name":"vc","expression":"0","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +0 +PREHOOK: query: EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE EXTRACT(MINUTE from `__time`) >= 0 LIMIT 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table +WHERE EXTRACT(MINUTE from `__time`) >= 0 LIMIT 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"bound","dimension":"__time","lower":"0","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"m","timeZone":"US/Pacific","locale":"en-US"}},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'MINUTE','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":2} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(MINUTE from `__time`) as minute FROM druid_table + WHERE EXTRACT(MINUTE from `__time`) >= 0 order by minute LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(MINUTE from `__time`) as minute FROM druid_table + WHERE EXTRACT(MINUTE from `__time`) >= 0 order by minute LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +0 +PREHOOK: query: EXPLAIN SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + TableScan + alias: druid_table + filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + properties: + druid.fieldNames vc,ctinyint + druid.fieldTypes timestamp with local time zone,tinyint + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"} + druid.query.type scan + Filter Operator + predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean) + Select Operator + expressions: hour(vc) (type: int) + outputColumnNames: _col0 + Limit + Number of rows: 1 + ListSink + +PREHOOK: query: SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(HOUR from `__time`) FROM druid_table +WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 +AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +15 +PREHOOK: query: EXPLAIN SELECT EXTRACT(DAY from `__time`), EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS day_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(DAY from `__time`), EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS day_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,vc0,vc1 + druid.fieldTypes int,bigint,string + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 8, 2), 'DOUBLE') == 31)"},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"div(timestamp_extract(\"__time\",'DAY','US/Pacific'),7)","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 8, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int), vc0 (type: bigint), vc1 (type: string) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: SELECT EXTRACT(DAY from `__time`) , EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS dar_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(DAY from `__time`) , EXTRACT(DAY from `__time`) DIV 7 AS WEEK, SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) AS dar_str +FROM druid_table WHERE SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 9, 2) = 31 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +31 4 31 +31 4 31 +PREHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"w","timeZone":"US/Pacific","locale":"en-US"}},{"type":"expression","expression":"((div(timestamp_extract(\"__time\",'WEEK','US/Pacific'),4) + 1) == 1)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'WEEK','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 +AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 +1 +PREHOOK: query: EXPLAIN SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table +WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table +WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,vc0,vc1 + druid.fieldTypes double,int,string + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M","timeZone":"US/Pacific","locale":"en-US"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / CAST(4, 'DOUBLE')) + CAST(1, 'DOUBLE'))","outputType":"DOUBLE"},{"type":"expression","name":"vc0","expression":"timestamp_extract(\"__time\",'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 5, 2)","outputType":"STRING"}],"columns":["vc","vc0","vc1"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: double), vc0 (type: int), vc1 (type: string) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table + WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table + WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +4.0 12 12 +4.0 12 12 +PREHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,vc0 + druid.fieldTypes int,double + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(timestamp_extract(\"__time\",'QUARTER','US/Pacific') >= 4)"},{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'QUARTER','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / CAST(4, 'DOUBLE')) + CAST(1, 'DOUBLE'))","outputType":"DOUBLE"}],"columns":["vc","vc0"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int), vc0 (type: double) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 + AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +4 4.0 +4 4.0 +PREHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) AS year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) AS year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,vc0 + druid.fieldTypes int,string + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969","lowerStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"yyyy","timeZone":"US/Pacific","locale":"en-US"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pacific'), 'STRING') == '1969')"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"substring(timestamp_format(timestamp_floor(\"__time\",'P1D','','US/Pacific'),'yyyy-MM-dd','US/Pacific'), 0, 4)","outputType":"STRING"}],"columns":["vc","vc0"],"resultFormat":"compactedList","limit":1} + druid.query.type scan + Select Operator + expressions: vc (type: int), vc0 (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) as year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) as year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 +AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969 1969 +1969 1969 +PREHOOK: query: DROP TABLE druid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@druid_table +PREHOOK: Output: default@druid_table +POSTHOOK: query: DROP TABLE druid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: default@druid_table diff --git ql/src/test/results/clientpositive/druid/druidmini_mv.q.out ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index 3ff7b39f13..97f6d84480 100644 --- ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -5,16 +5,17 @@ AS SELECT cast(current_timestamp() AS timestamp) AS t, cast(a AS int) AS a, cast(b AS varchar(256)) AS b, + cast(userid AS varchar(256)) AS userid, cast(c AS double) AS c, cast(d AS int) AS d FROM TABLE ( VALUES - (1, 'alfred', 10.30, 2), - (2, 'bob', 3.14, 3), - (2, 'bonnie', 172342.2, 3), - (3, 'calvin', 978.76, 3), - (3, 'charlie', 9.8, 1), - (3, 'charlie', 15.8, 1)) as q (a, b, c, d) + (1, 'alfred', 'alfred', 10.30, 2), + (2, 'bob', 'bob', 3.14, 3), + (2, 'bonnie', 'bonnie', 172342.2, 3), + (3, 'calvin', 'calvin', 978.76, 3), + (3, 'charlie', 'charlie_a', 9.8, 1), + (3, 'charlie', 'charlie_b', 15.8, 1)) as q (a, b, userid, c, d) PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: database:default @@ -26,16 +27,17 @@ AS SELECT cast(current_timestamp() AS timestamp) AS t, cast(a AS int) AS a, cast(b AS varchar(256)) AS b, + cast(userid AS varchar(256)) AS userid, cast(c AS double) AS c, cast(d AS int) AS d FROM TABLE ( VALUES - (1, 'alfred', 10.30, 2), - (2, 'bob', 3.14, 3), - (2, 'bonnie', 172342.2, 3), - (3, 'calvin', 978.76, 3), - (3, 'charlie', 9.8, 1), - (3, 'charlie', 15.8, 1)) as q (a, b, c, d) + (1, 'alfred', 'alfred', 10.30, 2), + (2, 'bob', 'bob', 3.14, 3), + (2, 'bonnie', 'bonnie', 172342.2, 3), + (3, 'calvin', 'calvin', 978.76, 3), + (3, 'charlie', 'charlie_a', 9.8, 1), + (3, 'charlie', 'charlie_b', 15.8, 1)) as q (a, b, userid, c, d) POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: database:default @@ -45,11 +47,12 @@ POSTHOOK: Lineage: cmv_basetable.b SCRIPT [] POSTHOOK: Lineage: cmv_basetable.c SCRIPT [] POSTHOOK: Lineage: cmv_basetable.d SCRIPT [] POSTHOOK: Lineage: cmv_basetable.t SIMPLE [] +POSTHOOK: Lineage: cmv_basetable.userid SCRIPT [] PREHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c, userid FROM cmv_basetable WHERE a = 2 PREHOOK: type: CREATE_MATERIALIZED_VIEW @@ -60,7 +63,7 @@ POSTHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c, userid FROM cmv_basetable WHERE a = 2 POSTHOOK: type: CREATE_MATERIALIZED_VIEW @@ -94,7 +97,7 @@ PREHOOK: query: CREATE MATERIALIZED VIEW IF NOT EXISTS cmv_mat_view2 ENABLE REWR STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c, userid FROM cmv_basetable WHERE a = 3 PREHOOK: type: CREATE_MATERIALIZED_VIEW @@ -105,7 +108,7 @@ POSTHOOK: query: CREATE MATERIALIZED VIEW IF NOT EXISTS cmv_mat_view2 ENABLE REW STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c, userid FROM cmv_basetable WHERE a = 3 POSTHOOK: type: CREATE_MATERIALIZED_VIEW @@ -120,8 +123,9 @@ POSTHOOK: query: SELECT a, c FROM cmv_mat_view2 POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: hdfs://### HDFS PATH ### +3 15.800000190734863 +3 9.800000190734863 3 978.760009765625 -6 25.600000381469727 PREHOOK: query: SHOW TBLPROPERTIES cmv_mat_view2 PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES cmv_mat_view2 @@ -285,12 +289,12 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 3 9.8 3 978.76 3 978.76 3 978.76 PREHOOK: query: INSERT INTO cmv_basetable VALUES - (cast(current_timestamp() AS timestamp), 3, 'charlie', 15.8, 1) + (cast(current_timestamp() AS timestamp), 3, 'charlie', 'charlie_c', 15.8, 1) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@cmv_basetable POSTHOOK: query: INSERT INTO cmv_basetable VALUES - (cast(current_timestamp() AS timestamp), 3, 'charlie', 15.8, 1) + (cast(current_timestamp() AS timestamp), 3, 'charlie', 'charlie_c', 15.8, 1) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@cmv_basetable @@ -299,6 +303,7 @@ POSTHOOK: Lineage: cmv_basetable.b SCRIPT [] POSTHOOK: Lineage: cmv_basetable.c SCRIPT [] POSTHOOK: Lineage: cmv_basetable.d SCRIPT [] POSTHOOK: Lineage: cmv_basetable.t SCRIPT [] +POSTHOOK: Lineage: cmv_basetable.userid SCRIPT [] Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( @@ -330,33 +335,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 41 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 360 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (a = 3) (type: boolean) - Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: double) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Map 3 Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 41 Data size: 640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 480 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Reducer 2 Reduce Operator Tree: @@ -367,14 +372,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 145 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 145 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -451,34 +456,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 41 Data size: 15680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 21960 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (a = 3) (type: boolean) - Statistics: Num rows: 5 Data size: 1912 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: CAST( t AS timestamp with local time zone) (type: timestamp with local time zone), 3 (type: int), b (type: varchar(256)), c (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 1912 Basic stats: COMPLETE Column stats: NONE + expressions: CAST( t AS timestamp with local time zone) (type: timestamp with local time zone), 3 (type: int), b (type: varchar(256)), c (type: double), userid (type: varchar(256)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, __time_granularity - Statistics: Num rows: 5 Data size: 1912 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), _col4 (type: varchar(256)), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, __time_granularity + Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: __time_granularity (type: timestamp) sort order: + Map-reduce partition columns: __time_granularity (type: timestamp) - Statistics: Num rows: 5 Data size: 1912 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double) + Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), _col4 (type: varchar(256)) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: int), VALUE._col2 (type: varchar(256)), VALUE._col3 (type: double), KEY.__time_granularity (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, __time_granularity - Statistics: Num rows: 5 Data size: 1912 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: int), VALUE._col2 (type: varchar(256)), VALUE._col3 (type: double), VALUE._col4 (type: varchar(256)), KEY.__time_granularity (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, __time_granularity + Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 5 Data size: 1912 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat @@ -506,7 +511,7 @@ rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler totalSize 0 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[8][tables = [cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 @@ -536,35 +541,26 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: cmv_basetable - Statistics: Num rows: 41 Data size: 480 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a = 3) (type: boolean) - Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + alias: cmv_mat_view2 + properties: + druid.fieldNames vc,a,b,c,userid + druid.fieldTypes timestamp with local time zone,int,varchar(256),double,varchar(256) + druid.query.json {"queryType":"scan","dataSource":"default.cmv_mat_view2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","a","b","c","userid"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: a (type: int), c (type: double) Map 3 Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 41 Data size: 640 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Statistics: Num rows: 30 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 30 Data size: 480 Basic stats: COMPLETE Column stats: NONE + value expressions: a (type: int), c (type: double), d (type: int) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -573,19 +569,22 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 138 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col1, _col3, _col9, _col12, _col13 + Statistics: Num rows: 90 Data size: 2610 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = 3) and (_col9 = 3)) (type: boolean) + Statistics: Num rows: 22 Data size: 638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col3 (type: double), _col1 (type: int), _col12 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 22 Data size: 638 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 638 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -593,7 +592,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[8][tables = [cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 JOIN @@ -601,6 +600,7 @@ PREHOOK: query: SELECT * FROM ( ON table1.a = table2.a) PREHOOK: type: QUERY PREHOOK: Input: default@cmv_basetable +PREHOOK: Input: default@cmv_mat_view2 PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 @@ -609,11 +609,12 @@ POSTHOOK: query: SELECT * FROM ( ON table1.a = table2.a) POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: hdfs://### HDFS PATH ### -3 15.8 3 978.76 -3 15.8 3 978.76 -3 9.8 3 978.76 -3 978.76 3 978.76 +3 15.800000190734863 3 978.76 +3 15.800000190734863 3 978.76 +3 9.800000190734863 3 978.76 +3 978.760009765625 3 978.76 PREHOOK: query: DROP MATERIALIZED VIEW cmv_mat_view PREHOOK: type: DROP_MATERIALIZED_VIEW PREHOOK: Input: default@cmv_mat_view diff --git ql/src/test/results/clientpositive/druid/druidmini_test1.q.out ql/src/test/results/clientpositive/druid/druidmini_test1.q.out index aa68f48e14..34cccef8e2 100644 --- ql/src/test/results/clientpositive/druid/druidmini_test1.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_test1.q.out @@ -64,7 +64,9 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Select Operator expressions: $f0 (type: bigint) @@ -97,10 +99,12 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"year","aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cfloat"},{"type":"doubleSum","name":"$f2","fieldName":"cdouble"},{"type":"longSum","name":"$f3","fieldName":"ctinyint"},{"type":"longSum","name":"$f4","fieldName":"csmallint"},{"type":"longSum","name":"$f5","fieldName":"cint"},{"type":"longSum","name":"$f6","fieldName":"cbigint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2,$f3,$f4,$f5,$f6 + druid.fieldTypes timestamp with local time zone,double,double,bigint,bigint,bigint,bigint + druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cfloat"},{"type":"doubleSum","name":"$f2","fieldName":"cdouble"},{"type":"longSum","name":"$f3","fieldName":"ctinyint"},{"type":"longSum","name":"$f4","fieldName":"csmallint"},{"type":"longSum","name":"$f5","fieldName":"cint"},{"type":"longSum","name":"$f6","fieldName":"cbigint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float), $f3 (type: bigint), $f4 (type: bigint), $f5 (type: bigint), $f6 (type: bigint) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: double), $f2 (type: double), $f3 (type: bigint), $f4 (type: bigint), $f5 (type: bigint), $f6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 ListSink @@ -114,8 +118,7 @@ FROM druid_table GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### -1968-12-31 16:00:00.0 US/Pacific -4532.57 3660538.8 -4611 3658030 688783835691 8060200254 -1969-12-31 16:00:00.0 US/Pacific -35057.676 2.3648124E7 -35356 4123059 719285966109 2932345033 +1969-01-01 00:00:00.0 US/Pacific -39590.24694168568 2.7308662809692383E7 -39967 7781089 1408069801800 10992545287 PREHOOK: query: EXPLAIN SELECT floor_year(`__time`), MIN(cfloat), MIN(cdouble), MIN(ctinyint), MIN(csmallint),MIN(cint), MIN(cbigint) FROM druid_table GROUP BY floor_year(`__time`) PREHOOK: type: QUERY @@ -133,10 +136,12 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"year","aggregations":[{"type":"doubleMin","name":"$f1","fieldName":"cfloat"},{"type":"doubleMin","name":"$f2","fieldName":"cdouble"},{"type":"longMin","name":"$f3","fieldName":"ctinyint"},{"type":"longMin","name":"$f4","fieldName":"csmallint"},{"type":"longMin","name":"$f5","fieldName":"cint"},{"type":"longMin","name":"$f6","fieldName":"cbigint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2,$f3,$f4,$f5,$f6 + druid.fieldTypes timestamp with local time zone,float,double,tinyint,smallint,int,bigint + druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMin","name":"$f1","fieldName":"cfloat"},{"type":"doubleMin","name":"$f2","fieldName":"cdouble"},{"type":"longMin","name":"$f3","fieldName":"ctinyint"},{"type":"longMin","name":"$f4","fieldName":"csmallint"},{"type":"longMin","name":"$f5","fieldName":"cint"},{"type":"longMin","name":"$f6","fieldName":"cbigint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float), $f3 (type: bigint), $f4 (type: bigint), $f5 (type: bigint), $f6 (type: bigint) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double), $f3 (type: tinyint), $f4 (type: smallint), $f5 (type: int), $f6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 ListSink @@ -150,8 +155,7 @@ FROM druid_table GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### -1968-12-31 16:00:00.0 US/Pacific -1790.778 -308691.84 -1790 -313425 0 -8577981133 -1969-12-31 16:00:00.0 US/Pacific -964.719 -287404.84 -1051 -292138 -1073279343 -2147311592 +1969-01-01 00:00:00.0 US/Pacific -1790.778 -308691.84375 2 14255 -1073279343 -8577981133 PREHOOK: query: EXPLAIN SELECT floor_year(`__time`), MAX(cfloat), MAX(cdouble), MAX(ctinyint), MAX(csmallint),MAX(cint), MAX(cbigint) FROM druid_table GROUP BY floor_year(`__time`) PREHOOK: type: QUERY @@ -169,10 +173,12 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"year","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"cfloat"},{"type":"doubleMax","name":"$f2","fieldName":"cdouble"},{"type":"longMax","name":"$f3","fieldName":"ctinyint"},{"type":"longMax","name":"$f4","fieldName":"csmallint"},{"type":"longMax","name":"$f5","fieldName":"cint"},{"type":"longMax","name":"$f6","fieldName":"cbigint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2,$f3,$f4,$f5,$f6 + druid.fieldTypes timestamp with local time zone,float,double,tinyint,smallint,int,bigint + druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"cfloat"},{"type":"doubleMax","name":"$f2","fieldName":"cdouble"},{"type":"longMax","name":"$f3","fieldName":"ctinyint"},{"type":"longMax","name":"$f4","fieldName":"csmallint"},{"type":"longMax","name":"$f5","fieldName":"cint"},{"type":"longMax","name":"$f6","fieldName":"cbigint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float), $f3 (type: bigint), $f4 (type: bigint), $f5 (type: bigint), $f6 (type: bigint) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double), $f3 (type: tinyint), $f4 (type: smallint), $f5 (type: int), $f6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 ListSink @@ -186,8 +192,7 @@ FROM druid_table GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### -1968-12-31 16:00:00.0 US/Pacific 62.0 57235.0 62 57235 314088763179 2144274348 -1969-12-31 16:00:00.0 US/Pacific 769.164 1.9565518E7 723 57435 319104152611 4923772860 +1969-01-01 00:00:00.0 US/Pacific 769.164 1.9565518E7 -45 -8101 1276572707 4923772860 PREHOOK: query: EXPLAIN SELECT cstring1, SUM(cdouble) as s FROM druid_table GROUP BY cstring1 ORDER BY s ASC LIMIT 10 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT cstring1, SUM(cdouble) as s FROM druid_table GROUP BY cstring1 ORDER BY s ASC LIMIT 10 @@ -203,10 +208,12 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"cstring1"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f1","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames cstring1,$f1 + druid.fieldTypes string,double + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f1","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator - expressions: cstring1 (type: string), $f1 (type: float) + expressions: cstring1 (type: string), $f1 (type: double) outputColumnNames: _col0, _col1 ListSink @@ -218,8 +225,8 @@ POSTHOOK: query: SELECT cstring1, SUM(cdouble) as s FROM druid_table GROUP BY cs POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### -1cGVWH7n1QU -596096.7 -821UdmGbkEf4j -14161.827 +1cGVWH7n1QU -596096.6875 +821UdmGbkEf4j -14161.827026367188 00iT08 0.0 02v8WnLuYDos3Cq 0.0 yv1js 0.0 @@ -243,10 +250,12 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"cstring2"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"cstring2","direction":"ascending","dimensionOrder":"alphanumeric"}]},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames cstring2,$f1 + druid.fieldTypes string,double + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"cstring2","outputName":"cstring2","outputType":"STRING"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"cstring2","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator - expressions: cstring2 (type: string), $f1 (type: float) + expressions: cstring2 (type: string), $f1 (type: double) outputColumnNames: _col0, _col1 ListSink @@ -259,15 +268,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### NULL 1.9565518E7 -0AAE3daA78MISbsRsHJrp2PI 0.0 -0amu3m60U20Xa3 -200.0 -0aO3Lwer 0.0 -0aQBRP67JY0gpi 15601.0 -0b1WvXy 0.0 -0b03cuG3B4ASx4es1411336I -7196.0 -0B5S310g 0.0 -0bffMd8KSbW32A8A5 0.0 -0bke07kBhD1s33AV3R1X7j7j 0.0 +0034fkcXMQI3 15601.0 +004J8y 0.0 +00GNm -200.0 +00GW4dnb6Wgj52 -200.0 +00PBhB1Iefgk 0.0 +00d5kr1wEB7evExG 15601.0 +00qccwt8n 0.0 +017fFeQ3Gcsa83Xj2Vo0 0.0 +01EfkvNk6mjG44uxs 0.0 PREHOOK: query: EXPLAIN SELECT `__time` FROM druid_table ORDER BY `__time` ASC LIMIT 10 @@ -293,11 +302,13 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -375,11 +386,13 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1900-01-01T00:00:00.000Z/1970-03-01T08:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/1970-03-01T08:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -459,11 +472,13 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-03-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1968-01-01T08:00:00.000Z/1970-03-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -545,11 +560,13 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-03-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1968-01-01T08:00:00.000Z/1970-03-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -631,11 +648,13 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-01-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1968-01-01T08:00:00.000Z/1970-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -717,11 +736,13 @@ STAGE PLANS: TableScan alias: druid_table properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-04-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1968-01-01T08:00:00.000Z/1970-04-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/druid_basic2.q.out ql/src/test/results/clientpositive/druid_basic2.q.out index eb2b83fec4..5ed039b2d6 100644 --- ql/src/test/results/clientpositive/druid_basic2.q.out +++ ql/src/test/results/clientpositive/druid_basic2.q.out @@ -76,8 +76,10 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames robot + druid.fieldTypes string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"columns":["robot"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator @@ -103,8 +105,10 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":["delta"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames delta + druid.fieldTypes float + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"columns":["delta"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator @@ -134,8 +138,10 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames robot + druid.fieldTypes string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"columns":["robot"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator @@ -165,7 +171,9 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"}],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames robot + druid.fieldTypes string + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -210,8 +218,10 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"dimensions":["robot","language"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames robot,language + druid.fieldTypes string,string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["robot","language"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator @@ -230,8 +240,10 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"dimensions":["language"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames language + druid.fieldTypes string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["language"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Reduce Output Operator @@ -259,8 +271,10 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"dimensions":["robot","language"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames robot,language + druid.fieldTypes string,string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["robot","language"],"resultFormat":"compactedList"} + druid.query.type scan #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -285,8 +299,10 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"dimensions":["language"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames language + druid.fieldTypes string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["language"],"resultFormat":"compactedList"} + druid.query.type scan #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -311,10 +327,10 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 language (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -346,7 +362,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [druid_table_1, $hdt$_0]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [$hdt$_0, druid_table_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN EXTENDED SELECT a.robot, b.language FROM @@ -384,31 +400,37 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + druid.fieldNames robot + druid.fieldTypes string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"columns":["robot"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + expressions: robot (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - tag: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: 0 + value expressions: _col0 (type: string) auto parallelism: false TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + druid.fieldNames vc + druid.fieldTypes string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"virtualColumns":[{"type":"expression","name":"vc","expression":"'en'","outputType":"STRING"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - tag: 0 - value expressions: robot (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + tag: 1 auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -427,8 +449,10 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames robot + druid.fieldTypes string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"columns":["robot"],"resultFormat":"compactedList"} + druid.query.type scan #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -453,8 +477,10 @@ STAGE PLANS: columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"virtualColumns":[{"type":"expression","name":"vc","expression":"'en'","outputType":"STRING"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -479,10 +505,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: string), 'en' (type: string) + expressions: _col0 (type: string), 'en' (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -529,132 +555,27 @@ ORDER BY CAST(robot AS INTEGER) ASC, m DESC LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"default","dimension":"language"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":"day","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - GatherStats: false - Select Operator - expressions: robot (type: string), floor_day (type: timestamp with local time zone), $f3 (type: float), $f4 (type: float), UDFToInteger(robot) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int), _col2 (type: float) - null sort order: az - sort order: +- - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: timestamp with local time zone), _col3 (type: float) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: druid_table_1 - input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat - output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}} - EXTERNAL TRUE - bucket_count -1 - column.name.delimiter , - columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted - columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' - columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float - druid.datasource wikipedia - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"default","dimension":"language"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":"day","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy -#### A masked pattern was here #### - name default.druid_table_1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct druid_table_1 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe - storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.druid.QTestDruidSerDe - - input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat - output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}} - EXTERNAL TRUE - bucket_count -1 - column.name.delimiter , - columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted - columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer' - columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float - druid.datasource wikipedia - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"default","dimension":"language"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":"day","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy -#### A masked pattern was here #### - name default.druid_table_1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct druid_table_1 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe - storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.druid.QTestDruidSerDe - name: default.druid_table_1 - name: default.druid_table_1 - Truncated Path -> Alias: - /druid_table_1 [$hdt$_0:druid_table_1] - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp with local time zone), KEY.reducesinkkey1 (type: float), VALUE._col2 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:timestamp with local time zone:float:float - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames robot,floor_day,$f3,$f4,(tok_function tok_int (tok_table_or_col robot)) + druid.fieldTypes string,timestamp with local time zone,float,double,int + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"(tok_function tok_int (tok_table_or_col robot))","direction":"ascending","dimensionOrder":"numeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"(tok_function tok_int (tok_table_or_col robot))","expression":"CAST(\"robot\", 'LONG')"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Select Operator + expressions: robot (type: string), floor_day (type: timestamp with local time zone), $f3 (type: float), $f4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT substring(namespace, CAST(deleted AS INT), 4) @@ -665,36 +586,26 @@ SELECT substring(namespace, CAST(deleted AS INT), 4) FROM druid_table_1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["namespace"],"metrics":["deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: substring(namespace, UDFToInteger(deleted), 4) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames vc + druid.fieldTypes string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"substring(\"namespace\", (CAST(\"deleted\", 'LONG') - 1), 4)","outputType":"STRING"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: vc (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_day(`__time`) @@ -713,83 +624,26 @@ ORDER BY robot LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - filterExpr: floor_day(__time) BETWEEN TIMESTAMPLOCALTZ'1999-11-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'1999-11-10 00:00:00.0 US/Pacific' (type: boolean) - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: floor_day(__time) BETWEEN TIMESTAMPLOCALTZ'1999-11-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'1999-11-10 00:00:00.0 US/Pacific' (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), floor_day(__time) (type: timestamp with local time zone) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: timestamp with local time zone) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: timestamp with local time zone) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: timestamp with local time zone) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: timestamp with local time zone) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: timestamp with local time zone) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: timestamp with local time zone) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames robot,floor_day + druid.fieldTypes string,timestamp with local time zone + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), floor_day (type: timestamp with local time zone) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT robot, `__time` @@ -808,54 +662,26 @@ ORDER BY robot LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - filterExpr: floor_day(extract) BETWEEN TIMESTAMPLOCALTZ'1999-11-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'1999-11-10 00:00:00.0 US/Pacific' (type: boolean) - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"robot"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: floor_day(extract) BETWEEN TIMESTAMPLOCALTZ'1999-11-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'1999-11-10 00:00:00.0 US/Pacific' (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), extract (type: timestamp with local time zone) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: timestamp with local time zone) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: timestamp with local time zone) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames extract,robot + druid.fieldTypes timestamp with local time zone,string + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), extract (type: timestamp with local time zone) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_day(`__time`) @@ -884,7 +710,9 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":"day","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"alphanumeric"}]},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} + druid.fieldNames robot,floor_day + druid.fieldTypes string,timestamp with local time zone + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/druid_basic3.q.out ql/src/test/results/clientpositive/druid_basic3.q.out index 9c4cbb55f9..ddce42e5e5 100644 --- ql/src/test/results/clientpositive/druid_basic3.q.out +++ ql/src/test/results/clientpositive/druid_basic3.q.out @@ -33,11 +33,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default","columns":[{"dimension":"postagg#0","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"arithmetic","name":"postagg#0","fn":"+","fields":[{"type":"fieldAccess","name":"","fieldName":"$f1"},{"type":"fieldAccess","name":"","fieldName":"$f2"}]}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames a,language + druid.fieldTypes double,string + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" + \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: postagg#0 (type: float), language (type: string) + expressions: a (type: double), language (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -65,11 +67,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default","columns":[{"dimension":"postagg#0","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"delta"},{"type":"doubleSum","name":"$f2","fieldName":"added"}],"postAggregations":[{"type":"arithmetic","name":"postagg#0","fn":"+","fields":[{"type":"fieldAccess","name":"","fieldName":"$f2"},{"type":"fieldAccess","name":"","fieldName":"$f1"}]}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames $f1,a,language + druid.fieldTypes double,double,string + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"delta"},{"type":"doubleSum","name":"$f2","fieldName":"added"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f2\" + \"$f1\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: $f1 (type: float), postagg#0 (type: float), language (type: string) + expressions: $f1 (type: double), a (type: double), language (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -97,11 +101,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default","columns":[{"dimension":"postagg#0","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"arithmetic","name":"postagg#0","fn":"quotient","fields":[{"type":"fieldAccess","name":"","fieldName":"$f1"},{"type":"fieldAccess","name":"","fieldName":"$f2"}]}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames language,a + druid.fieldTypes string,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" / \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: language (type: string), postagg#0 (type: float) + expressions: language (type: string), a (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -129,11 +135,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default","columns":[{"dimension":"postagg#0","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"arithmetic","name":"postagg#0","fn":"*","fields":[{"type":"fieldAccess","name":"","fieldName":"$f1"},{"type":"fieldAccess","name":"","fieldName":"$f2"}]}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames language,a + druid.fieldTypes string,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" * \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: language (type: string), postagg#0 (type: float) + expressions: language (type: string), a (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -161,11 +169,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default","columns":[{"dimension":"postagg#0","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"arithmetic","name":"postagg#0","fn":"-","fields":[{"type":"fieldAccess","name":"","fieldName":"$f1"},{"type":"fieldAccess","name":"","fieldName":"$f2"}]}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames language,a + druid.fieldTypes string,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" - \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: language (type: string), postagg#0 (type: float) + expressions: language (type: string), a (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -183,46 +193,26 @@ GROUP BY language ORDER BY a DESC POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), ($f1 + 100.0D) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - sort order: - - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames language,a + druid.fieldTypes string,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" + CAST(100, 'DOUBLE'))"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: language (type: string), a (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT language, -1 * (a + b) AS c @@ -241,46 +231,26 @@ FROM ( ORDER BY c DESC POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"},{"type":"count","name":"$f3"},{"type":"doubleSum","name":"$f4","fieldName":"deleted"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), (-1.0D * ((($f1 - $f2) / UDFToDouble(($f3 * 3L))) + $f4)) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - sort order: - - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames language,c + druid.fieldTypes string,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"c","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"},{"type":"count","name":"$f3"},{"type":"doubleSum","name":"$f4","fieldName":"deleted"}],"postAggregations":[{"type":"expression","name":"c","expression":"(-1.0 * (((\"$f1\" - \"$f2\") / CAST((\"$f3\" * 3), 'DOUBLE')) + \"$f4\"))"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: language (type: string), c (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT language, robot, sum(added) - sum(delta) AS a @@ -305,11 +275,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"default","dimension":"language"}],"limitSpec":{"type":"default"},"filter":{"type":"in","dimension":"__time","values":["10","11"],"extractionFn":{"type":"timeFormat","format":"w","timeZone":"US/Pacific","locale":"en-US"}},"aggregations":[{"type":"doubleSum","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames robot,language,$f2,$f3 + druid.fieldTypes string,string,double,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default"},"filter":{"type":"in","dimension":"__time","values":["10","11"],"extractionFn":{"type":"timeFormat","format":"w","timeZone":"US/Pacific","locale":"en-US"}},"aggregations":[{"type":"doubleSum","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: language (type: string), robot (type: string), ($f2 - $f3) (type: float) + expressions: language (type: string), robot (type: string), ($f2 - $f3) (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -339,46 +311,26 @@ GROUP BY language ORDER BY a DESC POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"delta"},{"type":"count","name":"$f2"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), ($f1 / UDFToDouble($f2)) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - sort order: - - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames language,a + druid.fieldTypes string,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"delta"},{"type":"count","name":"$f2"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" / CAST(\"$f2\", 'DOUBLE'))"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: language (type: string), a (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT language, sum(added) / sum(delta) AS a, @@ -395,36 +347,26 @@ GROUP BY language ORDER BY a DESC POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default","columns":[{"dimension":"postagg#0","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"},{"type":"doubleSum","name":"$f3","fieldName":"deleted"}],"postAggregations":[{"type":"arithmetic","name":"postagg#0","fn":"quotient","fields":[{"type":"fieldAccess","name":"","fieldName":"$f1"},{"type":"fieldAccess","name":"","fieldName":"$f2"}]}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), postagg#0 (type: float), CASE WHEN (($f3 = 0.0)) THEN (1) ELSE ($f3) END (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames language,a,b + druid.fieldTypes string,double,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"},{"type":"doubleSum","name":"$f3","fieldName":"deleted"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" / \"$f2\")"},{"type":"expression","name":"b","expression":"case_searched((\"$f3\" == 0.0),1,\"$f3\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: language (type: string), a (type: double), b (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT language, a, a - b as c @@ -443,46 +385,26 @@ FROM ( ORDER BY a DESC POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), ($f1 + 100.0D) (type: double), (($f1 + 100.0D) - $f2) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - sort order: - - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: double) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double), VALUE._col1 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames language,a,c + druid.fieldTypes string,double,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"a","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f1\" + CAST(100, 'DOUBLE'))"},{"type":"expression","name":"c","expression":"((\"$f1\" + CAST(100, 'DOUBLE')) - \"$f2\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: language (type: string), a (type: double), c (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT language, robot, "A" @@ -513,7 +435,9 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"default","dimension":"language"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames robot,language + druid.fieldTypes string,string + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -554,7 +478,9 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"default","dimension":"language"}],"limitSpec":{"type":"default","limit":5,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"alphanumeric"},{"dimension":"language","direction":"ascending","dimensionOrder":"alphanumeric"}]},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames robot,language + druid.fieldTypes string,string + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","limit":5,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"},{"dimension":"language","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/druid_intervals.q.out ql/src/test/results/clientpositive/druid_intervals.q.out index 0a0b1d31dc..03b07060ba 100644 --- ql/src/test/results/clientpositive/druid_intervals.q.out +++ ql/src/test/results/clientpositive/druid_intervals.q.out @@ -78,11 +78,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -108,11 +110,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/2012-03-01T08:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/2012-03-01T08:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -138,11 +142,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T08:00:00.000Z/2012-03-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2012-03-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -170,11 +176,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -200,11 +208,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -232,11 +242,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z","2012-01-01T08:00:00.000Z/2013-01-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z","2012-01-01T08:00:00.000Z/2013-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -264,11 +276,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T08:00:00.000Z/2012-01-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2012-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -294,11 +308,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T08:00:00.000Z/2010-01-01T08:00:00.001Z","2011-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2010-01-01T08:00:00.001Z","2011-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone) + expressions: vc (type: timestamp with local time zone) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -324,11 +340,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T08:00:00.000Z/2010-01-01T08:00:00.001Z","2011-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z"],"filter":{"type":"selector","dimension":"robot","value":"user1"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select + druid.fieldNames vc,vc0 + druid.fieldTypes timestamp with local time zone,string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["2010-01-01T08:00:00.000Z/2010-01-01T08:00:00.001Z","2011-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z"],"filter":{"type":"selector","dimension":"robot","value":"user1"},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"'user1'","outputType":"STRING"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} + druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone), 'user1' (type: string) + expressions: vc (type: timestamp with local time zone), vc0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -344,38 +362,24 @@ FROM druid_table_1 WHERE robot = 'user1' OR `__time` IN ('2010-01-01 00:00:00','2011-01-01 00:00:00') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - filterExpr: ((__time) IN (TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific', TIMESTAMPLOCALTZ'2011-01-01 00:00:00.0 US/Pacific') or (robot = 'user1')) (type: boolean) - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((__time) IN (TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific', TIMESTAMPLOCALTZ'2011-01-01 00:00:00.0 US/Pacific') or (robot = 'user1')) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp with local time zone), robot (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames vc,robot + druid.fieldTypes timestamp with local time zone,string + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"in","dimension":"__time","values":["2010-01-01T00:00:00.000Z","2011-01-01T00:00:00.000Z"],"extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"selector","dimension":"robot","value":"user1"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","robot"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: vc (type: timestamp with local time zone), robot (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink diff --git ql/src/test/results/clientpositive/druid_timeseries.q.out ql/src/test/results/clientpositive/druid_timeseries.q.out index 19a5af3a56..bb1d6f072f 100644 --- ql/src/test/results/clientpositive/druid_timeseries.q.out +++ ql/src/test/results/clientpositive/druid_timeseries.q.out @@ -15,53 +15,26 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` >= '2010-01-01 00:00:00 UTC' AND `__time` <= '2012-03-01 00:00:00 UTC' OR added <= 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - filterExpr: (((__time >= TIMESTAMPLOCALTZ'2009-12-31 16:00:00.0 US/Pacific') and (__time <= TIMESTAMPLOCALTZ'2012-02-29 16:00:00.0 US/Pacific')) or (added <= 0)) (type: boolean) - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (((__time >= TIMESTAMPLOCALTZ'2009-12-31 16:00:00.0 US/Pacific') and (__time <= TIMESTAMPLOCALTZ'2012-02-29 16:00:00.0 US/Pacific')) or (added <= 0)) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"2009-12-31T16:00:00.000Z","lowerStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"bound","dimension":"__time","upper":"2012-02-29T16:00:00.000Z","upperStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}}]},{"type":"bound","dimension":"added","upper":"0.0","upperStrict":false,"ordering":"numeric"}]},"aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00 UTC' PREHOOK: type: QUERY @@ -78,7 +51,9 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -106,11 +81,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: + druid.fieldNames $f0,$f1 + druid.fieldTypes float,double druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"doubleMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: $f0 (type: float), $f1 (type: float) + expressions: $f0 (type: float), $f1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -136,11 +113,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: + druid.fieldNames extract,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: extract (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) + expressions: extract (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -166,11 +145,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"year","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -186,24 +167,57 @@ FROM druid_table_1 GROUP BY floor_quarter(`__time`) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + properties: + druid.fieldNames vc,added,variation + druid.fieldTypes timestamp with local time zone,float,float + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","added","variation"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: floor_second(vc) (type: timestamp with local time zone), added (type: float), variation (type: float) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: max(_col1), sum(_col2) + keys: _col0 (type: timestamp with local time zone) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp with local time zone) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: float), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: timestamp with local time zone) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"quarter","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} - druid.query.type timeseries - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT floor_month(`__time`), max(added), sum(variation) @@ -226,11 +240,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"month","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -256,11 +272,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"week","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P1W","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -286,11 +304,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"day","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -316,11 +336,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"hour","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -346,11 +368,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"minute","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1M","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -376,11 +400,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"second","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1S","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -408,11 +434,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"hour","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float) + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -434,59 +462,26 @@ WHERE floor_hour(`__time`) GROUP BY floor_hour(`__time`) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - filterExpr: floor_hour(__time) BETWEEN TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2014-01-01 00:00:00.0 US/Pacific' (type: boolean) - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":["added","variation"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: floor_hour(__time) BETWEEN TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2014-01-01 00:00:00.0 US/Pacific' (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: floor_hour(__time) (type: timestamp with local time zone), added (type: float), variation (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: max(_col1), sum(_col2) - keys: _col0 (type: timestamp with local time zone) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp with local time zone) - sort order: + - Map-reduce partition columns: _col0 (type: timestamp with local time zone) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: timestamp with local time zone) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["2010-01-01T08:00:00.000Z/2014-01-01T08:00:00.001Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT subq.h, subq.m, subq.s @@ -511,59 +506,26 @@ WHERE subq.h BETWEEN CAST('2010-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZON AND CAST('2014-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - filterExpr: floor_hour(__time) BETWEEN TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2014-01-01 00:00:00.0 US/Pacific' (type: boolean) - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":["added","variation"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: floor_hour(__time) BETWEEN TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2014-01-01 00:00:00.0 US/Pacific' (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: floor_hour(__time) (type: timestamp with local time zone), added (type: float), variation (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: max(_col1), sum(_col2) - keys: _col0 (type: timestamp with local time zone) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp with local time zone) - sort order: + - Map-reduce partition columns: _col0 (type: timestamp with local time zone) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: timestamp with local time zone) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["2010-01-01T08:00:00.000Z/2014-01-01T08:00:00.001Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 PREHOOK: type: QUERY @@ -580,7 +542,9 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -604,7 +568,9 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -628,7 +594,9 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["2010-01-01T08:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["2010-01-01T08:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -652,7 +620,9 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/2012-03-01T08:00:00.001Z"],"context":{"skipEmptyBuckets":true}} + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/2012-03-01T08:00:00.001Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/druid_topn.q.out ql/src/test/results/clientpositive/druid_topn.q.out index 7a4c1f9f56..f5400a0f09 100644 --- ql/src/test/results/clientpositive/druid_topn.q.out +++ ql/src/test/results/clientpositive/druid_topn.q.out @@ -84,11 +84,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f1","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames robot,$f1,$f2 + druid.fieldTypes string,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f1","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: robot (type: string), $f1 (type: float), $f2 (type: float) + expressions: robot (type: string), $f1 (type: float), $f2 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -118,11 +120,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"robot"}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames extract,robot,$f2,$f3 + druid.fieldTypes timestamp with local time zone,string,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: robot (type: string), extract (type: timestamp with local time zone), $f2 (type: float), $f3 (type: float) + expressions: robot (type: string), extract (type: timestamp with local time zone), $f2 (type: float), $f3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -152,11 +156,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"extraction","dimension":"__time","outputName":"floor_year","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":"year","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames robot,floor_year,$f2,$f3 + druid.fieldTypes string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_year","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: robot (type: string), floor_year (type: timestamp with local time zone), $f2 (type: float), $f3 (type: float) + expressions: robot (type: string), floor_year (type: timestamp with local time zone), $f2 (type: float), $f3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -186,11 +192,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":"month","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames robot,floor_month,$f2,$f3 + druid.fieldTypes string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: robot (type: string), floor_month (type: timestamp with local time zone), $f2 (type: float), $f3 (type: float) + expressions: robot (type: string), floor_month (type: timestamp with local time zone), $f2 (type: float), $f3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -220,11 +228,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"default","dimension":"namespace"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":"month","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f4","direction":"descending","dimensionOrder":"numeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames robot,namespace,floor_month,$f3,$f4 + druid.fieldTypes string,string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"namespace","outputName":"namespace","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f4","direction":"descending","dimensionOrder":"numeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: robot (type: string), floor_month (type: timestamp with local time zone), $f3 (type: float), $f4 (type: float) + expressions: robot (type: string), floor_month (type: timestamp with local time zone), $f3 (type: float), $f4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -254,11 +264,13 @@ STAGE PLANS: TableScan alias: druid_table_1 properties: - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot"},{"type":"default","dimension":"namespace"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":"month","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"alphanumeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.fieldNames robot,namespace,floor_month,$f3,$f4 + druid.fieldTypes string,string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"namespace","outputName":"namespace","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: robot (type: string), floor_month (type: timestamp with local time zone), $f3 (type: float), $f4 (type: float) + expressions: robot (type: string), floor_month (type: timestamp with local time zone), $f3 (type: float), $f4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink @@ -280,54 +292,26 @@ ORDER BY s LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - properties: - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"year","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} - druid.query.type timeseries - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp with local time zone), $f1_0 (type: float), $f2 (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: float) - sort order: + - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: timestamp with local time zone), _col1 (type: float) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: float), KEY.reducesinkkey0 (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: '1' (type: string), _col0 (type: timestamp with local time zone), _col1 (type: float), _col2 (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames floor_year,$f1_0,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"floor_year","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f2","direction":"ascending","dimensionOrder":"numeric"}]},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: '1' (type: string), floor_year (type: timestamp with local time zone), $f1_0 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT robot, floor_hour(`__time`), max(added) as m, sum(variation) @@ -350,83 +334,24 @@ ORDER BY m LIMIT 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - filterExpr: floor_hour(__time) BETWEEN TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2014-01-01 00:00:00.0 US/Pacific' (type: boolean) - properties: - druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot"],"metrics":["added","variation"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: floor_hour(__time) BETWEEN TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2014-01-01 00:00:00.0 US/Pacific' (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: robot (type: string), floor_hour(__time) (type: timestamp with local time zone), added (type: float), variation (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: max(_col2), sum(_col3) - keys: _col0 (type: string), _col1 (type: timestamp with local time zone) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: timestamp with local time zone) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: timestamp with local time zone) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: float), _col3 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: timestamp with local time zone) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: float) - sort order: + - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: timestamp with local time zone), _col3 (type: double) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp with local time zone), KEY.reducesinkkey0 (type: float), VALUE._col2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: 100 + limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames robot,floor_hour,$f2,$f3 + druid.fieldTypes string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_hour","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f2","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["2010-01-01T08:00:00.000Z/2014-01-01T08:00:00.001Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: robot (type: string), floor_hour (type: timestamp with local time zone), $f2 (type: float), $f3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out index 86e3c56627..617ae3b759 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out @@ -278,7 +278,7 @@ POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: default@cmv_mat_view2 -Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[8][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from ( (select a, c from cmv_basetable where a = 3) table1 @@ -309,15 +309,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.cmv_mat_view2 - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: decimal(10,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,2)) + Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: a (type: int), c (type: decimal(10,2)) Execution mode: llap LLAP IO: all inputs Map 3 @@ -325,17 +321,10 @@ STAGE PLANS: TableScan alias: cmv_basetable Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: c (type: decimal(10,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: decimal(10,2)) + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: a (type: int), c (type: decimal(10,2)), d (type: int) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -347,19 +336,22 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 450 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col1 (type: decimal(10,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 450 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 450 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col5, _col7, _col8 + Statistics: Num rows: 10 Data size: 2370 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 3) and (_col8 = 3)) (type: boolean) + Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col0 (type: int), _col7 (type: decimal(10,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -367,7 +359,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[8][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from ( (select a, c from cmv_basetable where a = 3) table1 join diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out index 86e3c56627..617ae3b759 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out @@ -278,7 +278,7 @@ POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: default@cmv_mat_view2 -Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[8][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from ( (select a, c from cmv_basetable where a = 3) table1 @@ -309,15 +309,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.cmv_mat_view2 - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: decimal(10,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,2)) + Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: a (type: int), c (type: decimal(10,2)) Execution mode: llap LLAP IO: all inputs Map 3 @@ -325,17 +321,10 @@ STAGE PLANS: TableScan alias: cmv_basetable Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: c (type: decimal(10,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: decimal(10,2)) + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: a (type: int), c (type: decimal(10,2)), d (type: int) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -347,19 +336,22 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 450 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col1 (type: decimal(10,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 450 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 450 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col5, _col7, _col8 + Statistics: Num rows: 10 Data size: 2370 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 3) and (_col8 = 3)) (type: boolean) + Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col0 (type: int), _col7 (type: decimal(10,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -367,7 +359,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[8][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from ( (select a, c from cmv_basetable where a = 3) table1 join diff --git ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 1023143f97..5b68505076 100644 --- ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -1577,8 +1577,8 @@ STAGE PLANS: selectExpressions: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 15:int, LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 16:bigint, LongColUnaryMinus(col 3:bigint) -> 17:bigint, DoubleColUnaryMinus(col 4:float) -> 14:float, LongColAddLongColumn(col 18:bigint, col 3:bigint)(children: LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 18:bigint) -> 19:bigint, DoubleColDivideDoubleColumn(col 5:double, col 5:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, LongColMultiplyLongColumn(col 18:bigint, col 22:bigint)(children: col 18:int, LongColUnaryMinus(col 3:bigint) -> 22:bigint) -> 23:bigint, DoubleColAddDoubleColumn(col 24:double, col 25:double)(children: DoubleColUnaryMinus(col 5:double) -> 24:double, CastLongToDouble(col 3:bigint) -> 25:double) -> 26:double, DecimalScalarDivideDecimalColumn(val -1.389, col 27:decimal(3,0))(children: CastLongToDecimal(col 0:tinyint) -> 27:decimal(3,0)) -> 28:decimal(8,7), DoubleColModuloDoubleColumn(col 24:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 24:double) -> 25:double, LongColUnaryMinus(col 1:smallint) -> 18:smallint, LongColAddLongColumn(col 1:int, col 22:int)(children: col 1:smallint, LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 22:int) -> 29:int Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) - sort order: +++++++++++++++++++++++ + key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) + sort order: +++++++++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true @@ -1607,12 +1607,12 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(8,7)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) + expressions: KEY.reducesinkkey6 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: bigint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(8,7)), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: smallint), KEY.reducesinkkey20 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + projectedOutputColumnNums: [6, 1, 21, 2, 5, 3, 4, 7, 0, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 75 diff --git ql/src/test/results/clientpositive/materialized_view_create_rewrite.q.out ql/src/test/results/clientpositive/materialized_view_create_rewrite.q.out index e6e559f860..4da3d0930f 100644 --- ql/src/test/results/clientpositive/materialized_view_create_rewrite.q.out +++ ql/src/test/results/clientpositive/materialized_view_create_rewrite.q.out @@ -266,7 +266,7 @@ POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: default@cmv_mat_view2 -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[4][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from ( (select a, c from cmv_basetable where a = 3) table1 @@ -292,28 +292,17 @@ STAGE PLANS: TableScan alias: default.cmv_mat_view2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: decimal(10,2)) - outputColumnNames: _col0 + Reduce Output Operator + sort order: Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,2)) + value expressions: a (type: int), c (type: decimal(10,2)) TableScan alias: cmv_basetable Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: decimal(10,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,2)) + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE + value expressions: a (type: int), c (type: decimal(10,2)), d (type: int) Reduce Operator Tree: Join Operator condition map: @@ -321,19 +310,22 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col1 (type: decimal(10,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col5, _col7, _col8 + Statistics: Num rows: 10 Data size: 3580 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 3) and (_col8 = 3)) (type: boolean) Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col0 (type: int), _col7 (type: decimal(10,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -341,7 +333,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[4][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from ( (select a, c from cmv_basetable where a = 3) table1 join diff --git ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index c04f200269..7aa7b00c0e 100644 --- ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -1560,8 +1560,8 @@ STAGE PLANS: selectExpressions: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 15:int, LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 16:bigint, LongColUnaryMinus(col 3:bigint) -> 17:bigint, DoubleColUnaryMinus(col 4:float) -> 14:float, LongColAddLongColumn(col 18:bigint, col 3:bigint)(children: LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 18:bigint) -> 19:bigint, DoubleColDivideDoubleColumn(col 5:double, col 5:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, LongColMultiplyLongColumn(col 18:bigint, col 22:bigint)(children: col 18:int, LongColUnaryMinus(col 3:bigint) -> 22:bigint) -> 23:bigint, DoubleColAddDoubleColumn(col 24:double, col 25:double)(children: DoubleColUnaryMinus(col 5:double) -> 24:double, CastLongToDouble(col 3:bigint) -> 25:double) -> 26:double, DecimalScalarDivideDecimalColumn(val -1.389, col 27:decimal(3,0))(children: CastLongToDecimal(col 0:tinyint) -> 27:decimal(3,0)) -> 28:decimal(8,7), DoubleColModuloDoubleColumn(col 24:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 24:double) -> 25:double, LongColUnaryMinus(col 1:smallint) -> 18:smallint, LongColAddLongColumn(col 1:int, col 22:int)(children: col 1:smallint, LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 22:int) -> 29:int Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) - sort order: +++++++++++++++++++++++ + key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) + sort order: +++++++++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true @@ -1589,12 +1589,12 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(8,7)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) + expressions: KEY.reducesinkkey6 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: bigint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(8,7)), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: smallint), KEY.reducesinkkey20 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + projectedOutputColumnNums: [6, 1, 21, 2, 5, 3, 4, 7, 0, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 75