diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java index 70c923d18b..6c1dbd3d80 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java @@ -45,6 +45,8 @@ import org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader; import org.apache.hadoop.hive.druid.serde.DruidWritable; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.JobConf; @@ -74,7 +76,7 @@ * and parse the results. */ public class DruidQueryBasedInputFormat extends InputFormat - implements org.apache.hadoop.mapred.InputFormat { + implements org.apache.hadoop.mapred.InputFormat, VectorizedInputFormatInterface { private static final Logger LOG = LoggerFactory.getLogger(DruidQueryBasedInputFormat.class); @@ -289,6 +291,10 @@ private static String withQueryId(String druidQuery, String queryId) throws IOEx final String druidQueryType = job.get(Constants.DRUID_QUERY_TYPE, Query.SCAN); reader = getDruidQueryReader(druidQueryType); reader.initialize((HiveDruidSplit) split, job); + if (Utilities.getIsVectorized(job)) { + //noinspection unchecked + return (org.apache.hadoop.mapred.RecordReader) new DruidVectorizedWrapper(reader, job); + } return reader; } @@ -302,4 +308,7 @@ private static String withQueryId(String druidQuery, String queryId) throws IOEx return getDruidQueryReader(druidQueryType); } + @Override public VectorizedSupport.Support[] getSupportedFeatures() { + return new VectorizedSupport.Support[0]; + } } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidVectorizedWrapper.java druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidVectorizedWrapper.java new file mode 100644 index 0000000000..c07c24d755 --- /dev/null +++ druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidVectorizedWrapper.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.druid.io; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.druid.serde.DruidQueryRecordReader; +import org.apache.hadoop.hive.druid.serde.DruidSerDe; +import org.apache.hadoop.hive.druid.serde.DruidWritable; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssignRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.RecordReader; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Properties; + +/** + * A Wrapper class that takes a row-by-row Druid Record Reader and provides a Vectorized one. + * @param type of the Druid query. + */ +public class DruidVectorizedWrapper> implements RecordReader { + private final VectorAssignRow vectorAssignRow = new VectorAssignRow(); + private final DruidQueryRecordReader baseReader; + private final VectorizedRowBatchCtx rbCtx; + private final DruidSerDe serDe; + + /** + * Actual projected columns needed by the query, this can be empty in case of query like: select count(*) from src. + */ + private final int[] projectedColumns; + + private final DruidWritable druidWritable; + + public DruidVectorizedWrapper(DruidQueryRecordReader reader, Configuration jobConf) { + this.rbCtx = Utilities.getVectorizedRowBatchCtx(jobConf); + if (rbCtx.getDataColumnNums() != null) { + projectedColumns = rbCtx.getDataColumnNums(); + } else { + // case all the columns are selected + projectedColumns = new int[rbCtx.getRowColumnTypeInfos().length]; + for (int i = 0; i < projectedColumns.length; i++) { + projectedColumns[i] = i; + } + } + this.serDe = createAndInitializeSerde(jobConf); + this.baseReader = Preconditions.checkNotNull(reader); + + // row parser and row assigner initializing + try { + vectorAssignRow.init((StructObjectInspector) serDe.getObjectInspector()); + } catch (HiveException e) { + throw new RuntimeException(e); + } + + druidWritable = baseReader.createValue(); + } + + @Override public boolean next(NullWritable nullWritable, VectorizedRowBatch vectorizedRowBatch) throws IOException { + vectorizedRowBatch.reset(); + ArrayList row; + int rowsCount = 0; + while (rowsCount < vectorizedRowBatch.getMaxSize() && baseReader.next(nullWritable, druidWritable)) { + if (projectedColumns.length > 0) { + try { + row = serDe.deserializeAsPrimitive(druidWritable); + } catch (SerDeException e) { + throw new IOException(e); + } + for (int i : projectedColumns) { + vectorAssignRow.assignRowColumn(vectorizedRowBatch, rowsCount, i, row.get(i)); + } + } + rowsCount++; + } + vectorizedRowBatch.size = rowsCount; + return rowsCount > 0; + } + + @Override public NullWritable createKey() { + return NullWritable.get(); + } + + @Override public VectorizedRowBatch createValue() { + return rbCtx.createVectorizedRowBatch(); + } + + @Override public long getPos() throws IOException { + return baseReader.getPos(); + } + + @Override public void close() throws IOException { + baseReader.close(); + } + + @Override public float getProgress() throws IOException { + return baseReader.getProgress(); + } + + private static DruidSerDe createAndInitializeSerde(Configuration jobConf) { + DruidSerDe serDe = new DruidSerDe(); + MapWork mapWork = Preconditions.checkNotNull(Utilities.getMapWork(jobConf), "Map work is null"); + Properties + properties = + mapWork.getPartitionDescs() + .stream() + .map(partitionDesc -> partitionDesc.getTableDesc().getProperties()) + .findAny() + .orElseThrow(() -> new RuntimeException("Can not find table property at the map work")); + try { + serDe.initialize(jobConf, properties, null); + } catch (SerDeException e) { + throw new RuntimeException("Can not initialized the serde", e); + } + return serDe; + } +} diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java index 9bd84e8011..1f9580e311 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java @@ -479,7 +479,86 @@ protected SegmentAnalysis submitMetadataRequest(String address, SegmentMetadataQ return output; } - private long deserializeToMillis(Object value) { + /** + * Function to convert Druid Primitive values to Hive Primitives. Main usage of this is to pipe data to VectorRow. + * This has the exact same logic as {@link DruidSerDe#deserialize(Writable)}, any modification here should be done there + * as well. + * Reason to have 2 function is performance, merging both will bring an extra test on the hot loop. + * + * @param writable Druid Writable container. + * @return ArrayList of Hive Primitives. + */ + public ArrayList deserializeAsPrimitive(Writable writable) throws SerDeException { + final DruidWritable input = (DruidWritable) writable; + final ArrayList output = Lists.newArrayListWithExpectedSize(columns.length); + + for (int i = 0; i < columns.length; i++) { + final Object value = input.isCompacted() ? input.getCompactedValue().get(i) : input.getValue().get(columns[i]); + if (value == null) { + output.add(null); + continue; + } + switch (types[i].getPrimitiveCategory()) { + case TIMESTAMP: + output.add(Timestamp.ofEpochMilli(deserializeToMillis(value))); + break; + case TIMESTAMPLOCALTZ: + output.add(new TimestampTZ(ZonedDateTime.ofInstant(Instant.ofEpochMilli(deserializeToMillis(value)), + ((TimestampLocalTZTypeInfo) types[i]).timeZone()))); + break; + case DATE: + output.add(Date.ofEpochMilli(deserializeToMillis(value))); + break; + case BYTE: + output.add(((Number) value).byteValue()); + break; + case SHORT: + output.add(((Number) value).shortValue()); + break; + case INT: + if (value instanceof Number) { + output.add(((Number) value).intValue()); + } else { + // This is a corner case where we have an extract of time unit like day/month pushed as Extraction Fn + //@TODO The best way to fix this is to add explicit output Druid types to Calcite Extraction Functions impls + output.add(Integer.valueOf((String) value)); + } + + break; + case LONG: + output.add(((Number) value).longValue()); + break; + case FLOAT: + output.add(((Number) value).floatValue()); + break; + case DOUBLE: + output.add(((Number) value).doubleValue()); + break; + case CHAR: + output.add(new HiveChar(value.toString(), ((CharTypeInfo) types[i]).getLength())); + break; + case VARCHAR: + output.add(new HiveVarchar(value.toString(), ((VarcharTypeInfo) types[i]).getLength())); + break; + case STRING: + output.add(value.toString()); + break; + case BOOLEAN: + if (value instanceof Number) { + output.add(((Number) value).intValue() != 0); + } else { + output.add(Boolean.valueOf(value.toString())); + } + break; + default: + throw new SerDeException("Unknown type: " + types[i].getPrimitiveCategory()); + } + } + + return output; + } + + private static long deserializeToMillis(Object value) { long numberOfMillis; if (value instanceof Number) { numberOfMillis = ((Number) value).longValue(); diff --git ql/src/test/queries/clientpositive/druidmini_expressions.q ql/src/test/queries/clientpositive/druidmini_expressions.q index ce511de50a..2f177108cf 100644 --- ql/src/test/queries/clientpositive/druidmini_expressions.q +++ ql/src/test/queries/clientpositive/druidmini_expressions.q @@ -65,6 +65,8 @@ select count(distinct cstring2), sum(2 * cdouble) FROM druid_table_alltypesorc G select count(DISTINCT cstring2) FROM druid_table_alltypesorc ; +explain select count(DISTINCT cstring1) FROM druid_table_alltypesorc ; + select count(DISTINCT cstring1) FROM druid_table_alltypesorc ; select count(DISTINCT cstring2), sum(cdouble) FROM druid_table_alltypesorc ; @@ -185,3 +187,16 @@ VALUES EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1; SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1; + +EXPLAIN select count(*) from (select `__time` from druid_table_alltypesorc limit 1025) as src; + +select count(*) from (select `__time` from druid_table_alltypesorc limit 1025) as src; + +-- No Vectorization since __time is timestamp with local time zone +explain select `timets` from (select `__time` as timets from druid_table_alltypesorc order by timets limit 10) as src order by `timets`; +-- Vectorization is on now since we cast to Timestamp +explain select `timets` from (select cast(`__time` as timestamp ) as timets from druid_table_alltypesorc order by timets limit 10) as src order by `timets`; + +select `timets_with_tz` from (select `__time` as timets_with_tz from druid_table_alltypesorc order by timets_with_tz limit 10) as src order by `timets_with_tz`; + +select `timets` from (select cast(`__time` as timestamp ) as timets from druid_table_alltypesorc order by timets limit 10) as src order by `timets`; diff --git ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index 0cd54fcf5a..47f663bbf6 100644 --- ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -930,6 +930,69 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### 6078 +PREHOOK: query: explain select count(DISTINCT cstring1) FROM druid_table_alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select count(DISTINCT cstring1) FROM druid_table_alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table_alltypesorc + properties: + druid.fieldNames cstring1 + druid.fieldTypes string + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 9173 Data size: 1603744 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cstring1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select count(DISTINCT cstring1) FROM druid_table_alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_alltypesorc @@ -1945,3 +2008,267 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 2004-04-09 2004-04-09 2004-04-11 2004-04-11 2004-04-12 2004-04-12 +PREHOOK: query: EXPLAIN select count(*) from (select `__time` from druid_table_alltypesorc limit 1025) as src +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN select count(*) from (select `__time` from druid_table_alltypesorc limit 1025) as src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table_alltypesorc + properties: + druid.fieldNames vc + druid.fieldTypes int + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"0","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":1025} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from (select `__time` from druid_table_alltypesorc limit 1025) as src +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from (select `__time` from druid_table_alltypesorc limit 1025) as src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +1025 +PREHOOK: query: explain select `timets` from (select `__time` as timets from druid_table_alltypesorc order by timets limit 10) as src order by `timets` +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select `timets` from (select `__time` as timets from druid_table_alltypesorc order by timets limit 10) as src order by `timets` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table_alltypesorc + properties: + druid.fieldNames vc + druid.fieldTypes timestamp with local time zone + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: vc (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select `timets` from (select cast(`__time` as timestamp ) as timets from druid_table_alltypesorc order by timets limit 10) as src order by `timets` +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select `timets` from (select cast(`__time` as timestamp ) as timets from druid_table_alltypesorc order by timets limit 10) as src order by `timets` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table_alltypesorc + properties: + druid.fieldNames vc + druid.fieldTypes timestamp + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: vc (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select `timets_with_tz` from (select `__time` as timets_with_tz from druid_table_alltypesorc order by timets_with_tz limit 10) as src order by `timets_with_tz` +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select `timets_with_tz` from (select `__time` as timets_with_tz from druid_table_alltypesorc order by timets_with_tz limit 10) as src order by `timets_with_tz` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 15:59:00.0 US/Pacific +1969-12-31 15:59:00.0 US/Pacific +PREHOOK: query: select `timets` from (select cast(`__time` as timestamp ) as timets from druid_table_alltypesorc order by timets limit 10) as src order by `timets` +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select `timets` from (select cast(`__time` as timestamp ) as timets from druid_table_alltypesorc order by timets limit 10) as src order by `timets` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00 +1969-12-31 15:59:00