From c2f9c0b4779a079293ae592349dbcee13be77e6c Mon Sep 17 00:00:00 2001 From: Nishant Date: Fri, 18 Jan 2019 10:26:45 -0800 Subject: [PATCH] [HIVE-20546] Update to Apache Druid 0.13.0-incubating --- druid-handler/pom.xml | 22 +- .../hadoop/hive/druid/DruidKafkaUtils.java | 43 +- .../hive/druid/DruidStorageHandler.java | 65 +- .../hive/druid/DruidStorageHandlerUtils.java | 164 +++-- .../hive/druid/io/DruidOutputFormat.java | 30 +- .../druid/io/DruidQueryBasedInputFormat.java | 18 +- .../hive/druid/io/DruidRecordWriter.java | 34 +- .../hadoop/hive/druid/json/AvroParseSpec.java | 10 +- .../druid/json/AvroStreamInputRowParser.java | 6 +- .../druid/json/KafkaSupervisorIOConfig.java | 2 +- .../druid/json/KafkaSupervisorReport.java | 28 +- .../hive/druid/json/KafkaSupervisorSpec.java | 11 +- .../json/KafkaSupervisorTuningConfig.java | 43 +- .../hive/druid/json/KafkaTuningConfig.java | 238 +++++-- .../druid/security/KerberosHttpClient.java | 8 +- .../druid/security/ResponseCookieHandler.java | 12 +- .../RetryIfUnauthorizedResponseHandler.java | 13 +- .../serde/DruidGroupByQueryRecordReader.java | 4 +- .../druid/serde/DruidQueryRecordReader.java | 16 +- .../serde/DruidScanQueryRecordReader.java | 2 +- .../serde/DruidSelectQueryRecordReader.java | 6 +- .../hadoop/hive/druid/serde/DruidSerDe.java | 10 +- .../DruidTimeseriesQueryRecordReader.java | 4 +- .../serde/DruidTopNQueryRecordReader.java | 6 +- .../hive/druid/DerbyConnectorTestUtility.java | 8 +- .../hadoop/hive/druid/QTestDruidSerDe.java | 4 +- .../hadoop/hive/druid/QTestDruidSerDe2.java | 4 +- .../hive/druid/TestDruidStorageHandler.java | 22 +- .../TestHiveDruidQueryBasedInputFormat.java | 3 +- .../hive/druid/serde/TestDruidSerDe.java | 18 +- .../hive/ql/io/TestDruidRecordWriter.java | 58 +- itests/qtest-druid/pom.xml | 14 +- .../apache/hive/druid/ForkingDruidNode.java | 4 +- .../resources/testconfiguration.properties | 2 + pom.xml | 12 +- .../druid/druid_timeseries.q.out | 640 ++++++++++++++++++ .../clientpositive/druid/druid_topn.q.out | 375 ++++++++++ .../druid/druidmini_expressions.q.out | 14 +- .../clientpositive/druid/druidmini_mv.q.out | 30 +- .../druid/druidmini_test1.q.out | 12 +- .../druid/druidmini_test_ts.q.out | 12 +- 41 files changed, 1609 insertions(+), 418 deletions(-) create mode 100644 ql/src/test/results/clientpositive/druid/druid_timeseries.q.out create mode 100644 ql/src/test/results/clientpositive/druid/druid_topn.q.out diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml index 5642ea818c..0454747ad8 100644 --- a/druid-handler/pom.xml +++ b/druid-handler/pom.xml @@ -91,7 +91,7 @@ ${joda.version} - io.druid + org.apache.druid druid-server ${druid.version} @@ -108,14 +108,14 @@ jackson-databind - io.druid + org.apache.druid druid-aws-common true - io.druid + org.apache.druid druid-processing ${druid.version} @@ -128,7 +128,7 @@ true - io.druid.extensions + org.apache.druid.extensions druid-hdfs-storage ${druid.version} @@ -155,7 +155,7 @@ - io.druid.extensions + org.apache.druid.extensions mysql-metadata-storage ${druid.version} @@ -166,7 +166,7 @@ - io.druid.extensions + org.apache.druid.extensions postgresql-metadata-storage ${druid.version} @@ -240,7 +240,7 @@ test - io.druid + org.apache.druid druid-indexing-hadoop ${druid.version} test @@ -291,8 +291,8 @@ false - io.druid - org.apache.hive.druid.io.druid + org.apache.druid + org.apache.hive.druid.org.apache.druid io.netty @@ -321,8 +321,8 @@ - io.druid:* - io.druid.extensions:* + org.apache.druid:* + org.apache.druid.extensions:* io.netty:* org.apache.calcite:* com.fasterxml.jackson.core:* diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java index c5dc1e8eba..b56d48aa4f 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java @@ -21,18 +21,19 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; -import io.druid.data.input.impl.CSVParseSpec; -import io.druid.data.input.impl.DelimitedParseSpec; -import io.druid.data.input.impl.DimensionsSpec; -import io.druid.data.input.impl.InputRowParser; -import io.druid.data.input.impl.JSONParseSpec; -import io.druid.data.input.impl.StringInputRowParser; -import io.druid.data.input.impl.TimestampSpec; -import io.druid.java.util.http.client.Request; -import io.druid.java.util.http.client.response.FullResponseHandler; -import io.druid.java.util.http.client.response.FullResponseHolder; -import io.druid.segment.IndexSpec; -import io.druid.segment.indexing.DataSchema; +import org.apache.druid.data.input.impl.CSVParseSpec; +import org.apache.druid.data.input.impl.DelimitedParseSpec; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.InputRowParser; +import org.apache.druid.data.input.impl.JSONParseSpec; +import org.apache.druid.data.input.impl.StringInputRowParser; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.java.util.http.client.Request; +import org.apache.druid.java.util.http.client.response.FullResponseHandler; +import org.apache.druid.java.util.http.client.response.FullResponseHolder; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.hadoop.hive.druid.conf.DruidConstants; import org.apache.hadoop.hive.druid.json.AvroParseSpec; import org.apache.hadoop.hive.druid.json.AvroStreamInputRowParser; @@ -73,8 +74,13 @@ static KafkaSupervisorSpec createKafkaSupervisorSpec(Table table, return new KafkaSupervisorSpec(dataSchema, new KafkaSupervisorTuningConfig(DruidStorageHandlerUtils.getIntegerProperty(table, DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "maxRowsInMemory"), + DruidStorageHandlerUtils.getLongProperty(table, + DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "maxBytesInMemory"), + DruidStorageHandlerUtils.getIntegerProperty(table, DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "maxRowsPerSegment"), + DruidStorageHandlerUtils.getLongProperty(table, + DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "maxTotalRows"), DruidStorageHandlerUtils.getPeriodProperty(table, DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "intermediatePersistPeriod"), null, @@ -90,7 +96,8 @@ static KafkaSupervisorSpec createKafkaSupervisorSpec(Table table, DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "handoffConditionTimeout"), DruidStorageHandlerUtils.getBooleanProperty(table, DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "resetOffsetAutomatically"), - DruidStorageHandlerUtils.getIntegerProperty(table, + TmpFileSegmentWriteOutMediumFactory.instance(), + DruidStorageHandlerUtils.getIntegerProperty(table, DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "workerThreads"), DruidStorageHandlerUtils.getIntegerProperty(table, DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "chatThreads"), @@ -101,7 +108,15 @@ static KafkaSupervisorSpec createKafkaSupervisorSpec(Table table, DruidStorageHandlerUtils.getPeriodProperty(table, DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "shutdownTimeout"), DruidStorageHandlerUtils.getPeriodProperty(table, - DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "offsetFetchPeriod")), + DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "offsetFetchPeriod"), + DruidStorageHandlerUtils.getPeriodProperty(table, + DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "intermediateHandoffPeriod"), + DruidStorageHandlerUtils.getBooleanProperty(table, + DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "logParseExceptions"), + DruidStorageHandlerUtils.getIntegerProperty(table, + DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "maxParseExceptions"), + DruidStorageHandlerUtils.getIntegerProperty(table, + DruidConstants.DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "maxSavedParseExceptions")), new KafkaSupervisorIOConfig(kafkaTopic, // Mandatory Property DruidStorageHandlerUtils.getIntegerProperty(table, diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java index 94a3a27ad4..c3bf491414 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java @@ -27,36 +27,37 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; -import io.druid.data.input.impl.DimensionSchema; -import io.druid.data.input.impl.DimensionsSpec; -import io.druid.data.input.impl.InputRowParser; -import io.druid.data.input.impl.TimestampSpec; -import io.druid.java.util.common.Pair; -import io.druid.java.util.common.RetryUtils; -import io.druid.java.util.common.lifecycle.Lifecycle; -import io.druid.java.util.http.client.HttpClient; -import io.druid.java.util.http.client.HttpClientConfig; -import io.druid.java.util.http.client.HttpClientInit; -import io.druid.java.util.http.client.Request; -import io.druid.java.util.http.client.response.FullResponseHandler; -import io.druid.java.util.http.client.response.FullResponseHolder; -import io.druid.metadata.MetadataStorageConnectorConfig; -import io.druid.metadata.MetadataStorageTablesConfig; -import io.druid.metadata.SQLMetadataConnector; -import io.druid.metadata.storage.derby.DerbyConnector; -import io.druid.metadata.storage.derby.DerbyMetadataStorage; -import io.druid.metadata.storage.mysql.MySQLConnector; -import io.druid.metadata.storage.mysql.MySQLConnectorConfig; -import io.druid.metadata.storage.postgresql.PostgreSQLConnector; -import io.druid.query.aggregation.AggregatorFactory; -import io.druid.segment.IndexSpec; -import io.druid.segment.indexing.DataSchema; -import io.druid.segment.indexing.granularity.GranularitySpec; -import io.druid.segment.loading.DataSegmentPusher; -import io.druid.segment.loading.SegmentLoadingException; -import io.druid.storage.hdfs.HdfsDataSegmentPusher; -import io.druid.storage.hdfs.HdfsDataSegmentPusherConfig; -import io.druid.timeline.DataSegment; +import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.InputRowParser; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.RetryUtils; +import org.apache.druid.java.util.common.lifecycle.Lifecycle; +import org.apache.druid.java.util.http.client.HttpClient; +import org.apache.druid.java.util.http.client.HttpClientConfig; +import org.apache.druid.java.util.http.client.HttpClientInit; +import org.apache.druid.java.util.http.client.Request; +import org.apache.druid.java.util.http.client.response.FullResponseHandler; +import org.apache.druid.java.util.http.client.response.FullResponseHolder; +import org.apache.druid.metadata.MetadataStorageConnectorConfig; +import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.metadata.SQLMetadataConnector; +import org.apache.druid.metadata.storage.derby.DerbyConnector; +import org.apache.druid.metadata.storage.derby.DerbyMetadataStorage; +import org.apache.druid.metadata.storage.mysql.MySQLConnector; +import org.apache.druid.metadata.storage.mysql.MySQLConnectorConfig; +import org.apache.druid.metadata.storage.postgresql.PostgreSQLConnector; +import org.apache.druid.metadata.storage.postgresql.PostgreSQLConnectorConfig; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.indexing.granularity.GranularitySpec; +import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.loading.SegmentLoadingException; +import org.apache.druid.storage.hdfs.HdfsDataSegmentPusher; +import org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig; +import org.apache.druid.timeline.DataSegment; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -859,7 +860,9 @@ private SQLMetadataConnector buildConnector() { case "postgresql": connector = new PostgreSQLConnector(storageConnectorConfigSupplier, - Suppliers.ofInstance(getDruidMetadataStorageTablesConfig())); + Suppliers.ofInstance(getDruidMetadataStorageTablesConfig()), + new PostgreSQLConnectorConfig() + ); break; case "derby": diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java index 27c4577c9e..7cf9bc7d77 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java @@ -30,62 +30,63 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Ordering; -import io.druid.data.input.impl.DimensionSchema; -import io.druid.data.input.impl.StringDimensionSchema; -import io.druid.jackson.DefaultObjectMapper; -import io.druid.java.util.common.JodaUtils; -import io.druid.java.util.common.MapUtils; -import io.druid.java.util.common.Pair; -import io.druid.java.util.common.granularity.Granularity; -import io.druid.java.util.emitter.EmittingLogger; -import io.druid.java.util.emitter.core.NoopEmitter; -import io.druid.java.util.emitter.service.ServiceEmitter; -import io.druid.java.util.http.client.HttpClient; -import io.druid.java.util.http.client.Request; -import io.druid.java.util.http.client.response.FullResponseHandler; -import io.druid.java.util.http.client.response.FullResponseHolder; -import io.druid.java.util.http.client.response.InputStreamResponseHandler; -import io.druid.math.expr.ExprMacroTable; -import io.druid.metadata.MetadataStorageTablesConfig; -import io.druid.metadata.SQLMetadataConnector; -import io.druid.metadata.storage.mysql.MySQLConnector; -import io.druid.query.aggregation.AggregatorFactory; -import io.druid.query.aggregation.DoubleSumAggregatorFactory; -import io.druid.query.aggregation.FloatSumAggregatorFactory; -import io.druid.query.aggregation.LongSumAggregatorFactory; -import io.druid.query.expression.LikeExprMacro; -import io.druid.query.expression.RegexpExtractExprMacro; -import io.druid.query.expression.TimestampCeilExprMacro; -import io.druid.query.expression.TimestampExtractExprMacro; -import io.druid.query.expression.TimestampFloorExprMacro; -import io.druid.query.expression.TimestampFormatExprMacro; -import io.druid.query.expression.TimestampParseExprMacro; -import io.druid.query.expression.TimestampShiftExprMacro; -import io.druid.query.expression.TrimExprMacro; -import io.druid.query.scan.ScanQuery; -import io.druid.query.select.SelectQueryConfig; -import io.druid.query.spec.MultipleIntervalSegmentSpec; -import io.druid.segment.IndexIO; -import io.druid.segment.IndexMergerV9; -import io.druid.segment.IndexSpec; -import io.druid.segment.data.BitmapSerdeFactory; -import io.druid.segment.data.ConciseBitmapSerdeFactory; -import io.druid.segment.data.RoaringBitmapSerdeFactory; -import io.druid.segment.indexing.granularity.GranularitySpec; -import io.druid.segment.indexing.granularity.UniformGranularitySpec; -import io.druid.segment.loading.DataSegmentPusher; -import io.druid.segment.realtime.appenderator.SegmentIdentifier; -import io.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; -import io.druid.storage.hdfs.HdfsDataSegmentPusher; -import io.druid.storage.hdfs.HdfsDataSegmentPusherConfig; -import io.druid.timeline.DataSegment; -import io.druid.timeline.TimelineObjectHolder; -import io.druid.timeline.VersionedIntervalTimeline; -import io.druid.timeline.partition.LinearShardSpec; -import io.druid.timeline.partition.NoneShardSpec; -import io.druid.timeline.partition.NumberedShardSpec; -import io.druid.timeline.partition.PartitionChunk; -import io.druid.timeline.partition.ShardSpec; +import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.JodaUtils; +import org.apache.druid.java.util.common.MapUtils; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.java.util.emitter.core.NoopEmitter; +import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.java.util.http.client.HttpClient; +import org.apache.druid.java.util.http.client.Request; +import org.apache.druid.java.util.http.client.response.FullResponseHandler; +import org.apache.druid.java.util.http.client.response.FullResponseHolder; +import org.apache.druid.java.util.http.client.response.InputStreamResponseHandler; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.metadata.SQLMetadataConnector; +import org.apache.druid.metadata.storage.mysql.MySQLConnector; +import org.apache.druid.query.DruidProcessingConfig; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.query.aggregation.FloatSumAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.expression.LikeExprMacro; +import org.apache.druid.query.expression.RegexpExtractExprMacro; +import org.apache.druid.query.expression.TimestampCeilExprMacro; +import org.apache.druid.query.expression.TimestampExtractExprMacro; +import org.apache.druid.query.expression.TimestampFloorExprMacro; +import org.apache.druid.query.expression.TimestampFormatExprMacro; +import org.apache.druid.query.expression.TimestampParseExprMacro; +import org.apache.druid.query.expression.TimestampShiftExprMacro; +import org.apache.druid.query.expression.TrimExprMacro; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.select.SelectQueryConfig; +import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.data.BitmapSerdeFactory; +import org.apache.druid.segment.data.ConciseBitmapSerdeFactory; +import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.indexing.granularity.GranularitySpec; +import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; +import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.realtime.appenderator.SegmentIdentifier; +import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; +import org.apache.druid.storage.hdfs.HdfsDataSegmentPusher; +import org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.TimelineObjectHolder; +import org.apache.druid.timeline.VersionedIntervalTimeline; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.apache.druid.timeline.partition.NoneShardSpec; +import org.apache.druid.timeline.partition.NumberedShardSpec; +import org.apache.druid.timeline.partition.PartitionChunk; +import org.apache.druid.timeline.partition.ShardSpec; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -224,9 +225,11 @@ private DruidStorageHandlerUtils() { /** * Used by druid to perform IO on indexes. */ - public static final IndexIO - INDEX_IO = - new IndexIO(JSON_MAPPER, TmpFileSegmentWriteOutMediumFactory.instance(), () -> 0); + public static final IndexIO INDEX_IO = new IndexIO(JSON_MAPPER, new DruidProcessingConfig() { + @Override public String getFormatString() { + return "%s-%s"; + } + }); /** * Used by druid to merge indexes. @@ -244,11 +247,10 @@ private DruidStorageHandlerUtils() { * Method that creates a request for Druid query using SMILE format. * * @param address of the host target. - * @param query druid query. - * + * @param query druid query. * @return Request object to be submitted. */ - public static Request createSmileRequest(String address, io.druid.query.Query query) { + public static Request createSmileRequest(String address, org.apache.druid.query.Query query) { try { return new Request(HttpMethod.POST, new URL(String.format("%s/druid/v2/", "http://" + address))).setContent( SMILE_MAPPER.writeValueAsBytes(query)).setHeader(HttpHeaders.Names.CONTENT_TYPE, SMILE_CONTENT_TYPE); @@ -265,11 +267,9 @@ public static Request createSmileRequest(String address, io.druid.query.Query qu * Method that submits a request to an Http address and retrieves the result. * The caller is responsible for closing the stream once it finishes consuming it. * - * @param client Http Client will be used to submit request. + * @param client Http Client will be used to submit request. * @param request Http request to be submitted. - * * @return response object. - * * @throws IOException in case of request IO error. */ public static InputStream submitRequest(HttpClient client, Request request) throws IOException { @@ -316,9 +316,7 @@ private static Request withUrl(Request old, URL url) { * the descriptor path will be * ../workingPath/task_id/{@link DruidStorageHandler#SEGMENTS_DESCRIPTOR_DIR_NAME}/*.json * @param conf hadoop conf to get the file system - * * @return List of DataSegments - * * @throws IOException can be for the case we did not produce data. */ public static List getCreatedSegments(Path taskDir, Configuration conf) throws IOException { @@ -339,7 +337,6 @@ private static Request withUrl(Request old, URL url) { * @param outputFS filesystem. * @param segment DataSegment object. * @param descriptorPath path. - * * @throws IOException in case any IO issues occur. */ public static void writeSegmentDescriptor(final FileSystem outputFS, @@ -362,7 +359,6 @@ public static void writeSegmentDescriptor(final FileSystem outputFS, /** * @param connector SQL metadata connector to the metadata storage * @param metadataStorageTablesConfig Table config - * * @return all the active data sources in the metadata storage */ static Collection getAllDataSourceNames(SQLMetadataConnector connector, @@ -382,7 +378,6 @@ public static void writeSegmentDescriptor(final FileSystem outputFS, * @param connector SQL connector to metadata * @param metadataStorageTablesConfig Tables configuration * @param dataSource Name of data source - * * @return true if the data source was successfully disabled false otherwise */ static boolean disableDataSource(SQLMetadataConnector connector, @@ -411,17 +406,15 @@ static boolean disableDataSource(SQLMetadataConnector connector, * Then moves segments to druid deep storage with updated metadata/version. * ALL IS DONE IN ONE TRANSACTION * - * @param connector DBI connector to commit + * @param connector DBI connector to commit * @param metadataStorageTablesConfig Druid metadata tables definitions - * @param dataSource Druid datasource name - * @param segments List of segments to move and commit to metadata - * @param overwrite if it is an insert overwrite - * @param conf Configuration - * @param dataSegmentPusher segment pusher - * + * @param dataSource Druid datasource name + * @param segments List of segments to move and commit to metadata + * @param overwrite if it is an insert overwrite + * @param conf Configuration + * @param dataSegmentPusher segment pusher * @return List of successfully published Druid segments. * This list has the updated versions and metadata about segments after move and timeline sorting - * * @throws CallbackFailedException in case the connector can not add the segment to the DB. */ @SuppressWarnings("unchecked") static List publishSegmentsAndCommit(final SQLMetadataConnector connector, @@ -551,7 +544,6 @@ private static void disableDataSourceWithHandle(Handle handle, * @param connector SQL connector to metadata * @param metadataStorageTablesConfig Tables configuration * @param dataSource Name of data source - * * @return List of all data segments part of the given data source */ static List getDataSegmentList(final SQLMetadataConnector connector, @@ -577,7 +569,6 @@ private static void disableDataSourceWithHandle(Handle handle, /** * @param connector SQL DBI connector. - * * @return streaming fetch size. */ private static int getStreamingFetchSize(SQLMetadataConnector connector) { @@ -588,9 +579,8 @@ private static int getStreamingFetchSize(SQLMetadataConnector connector) { } /** - * @param pushedSegment the pushed data segment object + * @param pushedSegment the pushed data segment object * @param segmentsDescriptorDir actual directory path for descriptors. - * * @return a sanitize file name */ public static Path makeSegmentDescriptorOutputPath(DataSegment pushedSegment, Path segmentsDescriptorDir) { @@ -673,12 +663,12 @@ static int getIntegerProperty(Table table, String propertyName, int defaultVal) @Nullable public static List getListProperty(Table table, String propertyName) { List rv = new ArrayList<>(); String values = getTableProperty(table, propertyName); - if(values == null) { + if (values == null) { return null; } String[] vals = values.trim().split(","); - for(String val : vals) { - if(org.apache.commons.lang.StringUtils.isNotBlank(val)) { + for (String val : vals) { + if (org.apache.commons.lang.StringUtils.isNotBlank(val)) { rv.add(val); } } @@ -867,8 +857,8 @@ public static IndexSpec getIndexSpec(Configuration jc) { case TIMESTAMP: // Granularity column String tColumnName = columnNames.get(i); - if (!tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME) && !tColumnName.equals( - DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) { + if (!tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME) + && !tColumnName.equals(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) { throw new IllegalArgumentException("Dimension " + tColumnName + " does not have STRING type: " diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java index f0f039a347..2ad6a7f8b0 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java @@ -19,19 +19,19 @@ package org.apache.hadoop.hive.druid.io; import com.fasterxml.jackson.core.type.TypeReference; -import io.druid.data.input.impl.DimensionSchema; -import io.druid.data.input.impl.DimensionsSpec; -import io.druid.data.input.impl.InputRowParser; -import io.druid.data.input.impl.MapInputRowParser; -import io.druid.data.input.impl.TimeAndDimsParseSpec; -import io.druid.data.input.impl.TimestampSpec; -import io.druid.java.util.common.Pair; -import io.druid.query.aggregation.AggregatorFactory; -import io.druid.segment.IndexSpec; -import io.druid.segment.indexing.DataSchema; -import io.druid.segment.indexing.RealtimeTuningConfig; -import io.druid.segment.indexing.granularity.GranularitySpec; -import io.druid.segment.realtime.plumber.CustomVersioningPolicy; +import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.InputRowParser; +import org.apache.druid.data.input.impl.MapInputRowParser; +import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.indexing.RealtimeTuningConfig; +import org.apache.druid.segment.indexing.granularity.GranularitySpec; +import org.apache.druid.segment.realtime.plumber.CustomVersioningPolicy; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FileSystem; @@ -153,6 +153,7 @@ IndexSpec indexSpec = DruidStorageHandlerUtils.getIndexSpec(jc); RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(maxRowInMemory, + null, null, null, new File(basePersistDirectory, dataSource), @@ -167,7 +168,8 @@ true, null, 0L, - null + null, + null ); LOG.debug(String.format("running with Data schema [%s] ", dataSchema)); diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java index 6c1dbd3d80..82a1f11f3f 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java @@ -19,15 +19,15 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.collect.Lists; -import io.druid.java.util.http.client.Request; -import io.druid.query.BaseQuery; -import io.druid.query.LocatedSegmentDescriptor; -import io.druid.query.Query; -import io.druid.query.SegmentDescriptor; -import io.druid.query.scan.ScanQuery; -import io.druid.query.select.PagingSpec; -import io.druid.query.select.SelectQuery; -import io.druid.query.spec.MultipleSpecificSegmentSpec; +import org.apache.druid.java.util.http.client.Request; +import org.apache.druid.query.BaseQuery; +import org.apache.druid.query.LocatedSegmentDescriptor; +import org.apache.druid.query.Query; +import org.apache.druid.query.SegmentDescriptor; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.select.PagingSpec; +import org.apache.druid.query.select.SelectQuery; +import org.apache.druid.query.spec.MultipleSpecificSegmentSpec; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java index 15be0c325e..8c3f18e616 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java @@ -23,23 +23,23 @@ import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import io.druid.data.input.Committer; -import io.druid.data.input.InputRow; -import io.druid.data.input.MapBasedInputRow; -import io.druid.java.util.common.DateTimes; -import io.druid.java.util.common.granularity.Granularity; -import io.druid.segment.indexing.DataSchema; -import io.druid.segment.indexing.RealtimeTuningConfig; -import io.druid.segment.loading.DataSegmentPusher; -import io.druid.segment.realtime.FireDepartmentMetrics; -import io.druid.segment.realtime.appenderator.Appenderator; -import io.druid.segment.realtime.appenderator.Appenderators; -import io.druid.segment.realtime.appenderator.SegmentIdentifier; -import io.druid.segment.realtime.appenderator.SegmentNotWritableException; -import io.druid.segment.realtime.appenderator.SegmentsAndMetadata; -import io.druid.segment.realtime.plumber.Committers; -import io.druid.timeline.DataSegment; -import io.druid.timeline.partition.LinearShardSpec; +import org.apache.druid.data.input.Committer; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.indexing.RealtimeTuningConfig; +import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.appenderator.Appenderator; +import org.apache.druid.segment.realtime.appenderator.Appenderators; +import org.apache.druid.segment.realtime.appenderator.SegmentIdentifier; +import org.apache.druid.segment.realtime.appenderator.SegmentNotWritableException; +import org.apache.druid.segment.realtime.appenderator.SegmentsAndMetadata; +import org.apache.druid.segment.realtime.plumber.Committers; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/AvroParseSpec.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/AvroParseSpec.java index 6cada1cb50..48d6cf2d6c 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/AvroParseSpec.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/AvroParseSpec.java @@ -21,11 +21,11 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; -import io.druid.data.input.impl.DimensionsSpec; -import io.druid.data.input.impl.ParseSpec; -import io.druid.data.input.impl.TimestampSpec; -import io.druid.java.util.common.parsers.JSONPathSpec; -import io.druid.java.util.common.parsers.Parser; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.ParseSpec; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.java.util.common.parsers.JSONPathSpec; +import org.apache.druid.java.util.common.parsers.Parser; import java.util.Objects; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/AvroStreamInputRowParser.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/AvroStreamInputRowParser.java index 9532e055d6..b689b63031 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/AvroStreamInputRowParser.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/AvroStreamInputRowParser.java @@ -20,9 +20,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; -import io.druid.data.input.ByteBufferInputRowParser; -import io.druid.data.input.InputRow; -import io.druid.data.input.impl.ParseSpec; +import org.apache.druid.data.input.ByteBufferInputRowParser; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.impl.ParseSpec; import javax.validation.constraints.NotNull; import java.nio.ByteBuffer; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java index fba591792d..9f5fc48e6e 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java @@ -21,7 +21,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Optional; import com.google.common.base.Preconditions; -import io.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.StringUtils; import org.joda.time.Duration; import org.joda.time.Period; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorReport.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorReport.java index b08c206308..bca1c78968 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorReport.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorReport.java @@ -17,19 +17,17 @@ */ package org.apache.hadoop.hive.druid.json; -import io.druid.indexing.overlord.supervisor.SupervisorReport; - import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.Lists; - +import org.apache.druid.indexing.overlord.supervisor.SupervisorReport; import org.joda.time.DateTime; +import javax.annotation.Nullable; import java.util.List; import java.util.Map; - -import javax.annotation.Nullable; +import java.util.Objects; /** * This class is copied from druid source code @@ -149,7 +147,7 @@ @JsonCreator public KafkaSupervisorReport(@JsonProperty("id") String id, @JsonProperty("generationTime") DateTime generationTime, @JsonProperty("payload") KafkaSupervisorReportPayload payload) { - super(id, generationTime); + super(id, generationTime, payload); this.payload = payload; } @@ -183,4 +181,22 @@ public KafkaSupervisorReport(String dataSource, @Override public String toString() { return "{" + "id='" + getId() + '\'' + ", generationTime=" + getGenerationTime() + ", payload=" + payload + '}'; } + + @Override public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + KafkaSupervisorReport that = (KafkaSupervisorReport) o; + return Objects.equals(payload, that.payload); + } + + @Override public int hashCode() { + return Objects.hash(super.hashCode(), payload); + } } diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorSpec.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorSpec.java index 35a9950f63..d230832243 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorSpec.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorSpec.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.druid.json; -import io.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.indexing.DataSchema; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; @@ -62,7 +62,14 @@ null, null, null, - null); + null, + null, + null, + null, + null, + null, + null, + null); this.ioConfig = Preconditions.checkNotNull(ioConfig, "ioConfig"); this.context = context; } diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorTuningConfig.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorTuningConfig.java index 7ea23913d2..b4d38b97f4 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorTuningConfig.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorTuningConfig.java @@ -18,15 +18,16 @@ package org.apache.hadoop.hive.druid.json; -import io.druid.segment.IndexSpec; - import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; - +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.indexing.TuningConfigs; +import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.joda.time.Duration; import org.joda.time.Period; +import javax.annotation.Nullable; import java.io.File; /** @@ -44,7 +45,9 @@ private final Duration offsetFetchPeriod; public KafkaSupervisorTuningConfig(@JsonProperty("maxRowsInMemory") Integer maxRowsInMemory, + @JsonProperty("maxBytesInMemory") Long maxBytesInMemory, @JsonProperty("maxRowsPerSegment") Integer maxRowsPerSegment, + @JsonProperty("maxTotalRows") Long maxTotalRows, @JsonProperty("intermediatePersistPeriod") Period intermediatePersistPeriod, @JsonProperty("basePersistDirectory") File basePersistDirectory, @JsonProperty("maxPendingPersists") Integer maxPendingPersists, @@ -53,26 +56,35 @@ public KafkaSupervisorTuningConfig(@JsonProperty("maxRowsInMemory") Integer maxR @JsonProperty("buildV9Directly") Boolean buildV9Directly, @JsonProperty("reportParseExceptions") Boolean reportParseExceptions, @JsonProperty("handoffConditionTimeout") Long handoffConditionTimeout, - // for backward compatibility @JsonProperty("resetOffsetAutomatically") Boolean resetOffsetAutomatically, + @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, @JsonProperty("workerThreads") Integer workerThreads, @JsonProperty("chatThreads") Integer chatThreads, @JsonProperty("chatRetries") Long chatRetries, @JsonProperty("httpTimeout") Period httpTimeout, @JsonProperty("shutdownTimeout") Period shutdownTimeout, - @JsonProperty("offsetFetchPeriod") Period offsetFetchPeriod) { + @JsonProperty("offsetFetchPeriod") Period offsetFetchPeriod, + @JsonProperty("intermediateHandoffPeriod") Period intermediateHandoffPeriod, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions) { super(maxRowsInMemory, + maxBytesInMemory, maxRowsPerSegment, + maxTotalRows, intermediatePersistPeriod, basePersistDirectory, maxPendingPersists, indexSpec, true, reportParseExceptions, - // Supervised kafka tasks should respect KafkaSupervisorIOConfig.completionTimeout instead of - // handoffConditionTimeout handoffConditionTimeout, - resetOffsetAutomatically); + resetOffsetAutomatically, + segmentWriteOutMediumFactory, + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions); this.workerThreads = workerThreads; this.chatThreads = chatThreads; @@ -107,12 +119,15 @@ public KafkaSupervisorTuningConfig(@JsonProperty("maxRowsInMemory") Integer maxR } @Override public String toString() { - //noinspection deprecation return "KafkaSupervisorTuningConfig{" + "maxRowsInMemory=" + getMaxRowsInMemory() + ", maxRowsPerSegment=" + getMaxRowsPerSegment() + + ", maxTotalRows=" + + getMaxTotalRows() + + ", maxBytesInMemory=" + + TuningConfigs.getMaxBytesInMemoryOrDefault(getMaxBytesInMemory()) + ", intermediatePersistPeriod=" + getIntermediatePersistPeriod() + ", basePersistDirectory=" @@ -127,6 +142,8 @@ public KafkaSupervisorTuningConfig(@JsonProperty("maxRowsInMemory") Integer maxR + getHandoffConditionTimeout() + ", resetOffsetAutomatically=" + isResetOffsetAutomatically() + + ", segmentWriteOutMediumFactory=" + + getSegmentWriteOutMediumFactory() + ", workerThreads=" + workerThreads + ", chatThreads=" @@ -139,6 +156,14 @@ public KafkaSupervisorTuningConfig(@JsonProperty("maxRowsInMemory") Integer maxR + shutdownTimeout + ", offsetFetchPeriod=" + offsetFetchPeriod + + ", intermediateHandoffPeriod=" + + getIntermediateHandoffPeriod() + + ", logParseExceptions=" + + isLogParseExceptions() + + ", maxParseExceptions=" + + getMaxParseExceptions() + + ", maxSavedParseExceptions=" + + getMaxSavedParseExceptions() + '}'; } diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaTuningConfig.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaTuningConfig.java index 9a19da264f..353a6d0abe 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaTuningConfig.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaTuningConfig.java @@ -18,57 +18,76 @@ package org.apache.hadoop.hive.druid.json; -import io.druid.segment.IndexSpec; -import io.druid.segment.indexing.RealtimeTuningConfig; -import io.druid.segment.realtime.appenderator.AppenderatorConfig; - import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; - -import io.druid.segment.writeout.SegmentWriteOutMediumFactory; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.indexing.RealtimeTuningConfig; +import org.apache.druid.segment.indexing.TuningConfig; +import org.apache.druid.segment.realtime.appenderator.AppenderatorConfig; +import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.joda.time.Period; import javax.annotation.Nullable; import java.io.File; +import java.util.Objects; /** * This class is copied from druid source code * in order to avoid adding additional dependencies on druid-indexing-service. */ -public class KafkaTuningConfig implements AppenderatorConfig { +public class KafkaTuningConfig implements TuningConfig, AppenderatorConfig { private static final int DEFAULT_MAX_ROWS_PER_SEGMENT = 5_000_000; private static final boolean DEFAULT_RESET_OFFSET_AUTOMATICALLY = false; private final int maxRowsInMemory; + private final long maxBytesInMemory; private final int maxRowsPerSegment; + @Nullable private final Long maxTotalRows; private final Period intermediatePersistPeriod; private final File basePersistDirectory; - private final int maxPendingPersists; + @Deprecated private final int maxPendingPersists; private final IndexSpec indexSpec; private final boolean reportParseExceptions; - private final long handoffConditionTimeout; + @Deprecated private final long handoffConditionTimeout; private final boolean resetOffsetAutomatically; + @Nullable private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; + private final Period intermediateHandoffPeriod; + + private final boolean logParseExceptions; + private final int maxParseExceptions; + private final int maxSavedParseExceptions; - @JsonCreator public KafkaTuningConfig(@JsonProperty("maxRowsInMemory") Integer maxRowsInMemory, - @JsonProperty("maxRowsPerSegment") Integer maxRowsPerSegment, - @JsonProperty("intermediatePersistPeriod") Period intermediatePersistPeriod, - @JsonProperty("basePersistDirectory") File basePersistDirectory, - @JsonProperty("maxPendingPersists") Integer maxPendingPersists, - @JsonProperty("indexSpec") IndexSpec indexSpec, + @JsonCreator public KafkaTuningConfig(@JsonProperty("maxRowsInMemory") @Nullable Integer maxRowsInMemory, + @JsonProperty("maxBytesInMemory") @Nullable Long maxBytesInMemory, + @JsonProperty("maxRowsPerSegment") @Nullable Integer maxRowsPerSegment, + @JsonProperty("maxTotalRows") @Nullable Long maxTotalRows, + @JsonProperty("intermediatePersistPeriod") @Nullable Period intermediatePersistPeriod, + @JsonProperty("basePersistDirectory") @Nullable File basePersistDirectory, + @JsonProperty("maxPendingPersists") @Nullable Integer maxPendingPersists, + @JsonProperty("indexSpec") @Nullable IndexSpec indexSpec, // This parameter is left for compatibility when reading existing configs, to be removed in Druid 0.12. - @SuppressWarnings("unused") @JsonProperty("buildV9Directly") Boolean buildV9Directly, - @JsonProperty("reportParseExceptions") Boolean reportParseExceptions, - @JsonProperty("handoffConditionTimeout") Long handoffConditionTimeout, - @JsonProperty("resetOffsetAutomatically") Boolean resetOffsetAutomatically) { + @JsonProperty("buildV9Directly") @Nullable Boolean buildV9Directly, + @Deprecated @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, + @JsonProperty("handoffConditionTimeout") @Nullable Long handoffConditionTimeout, + @JsonProperty("resetOffsetAutomatically") @Nullable Boolean resetOffsetAutomatically, + @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, + @JsonProperty("intermediateHandoffPeriod") @Nullable Period intermediateHandoffPeriod, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions) { // Cannot be a static because default basePersistDirectory is unique per-instance final RealtimeTuningConfig defaults = RealtimeTuningConfig.makeDefaultTuningConfig(basePersistDirectory); this.maxRowsInMemory = maxRowsInMemory == null ? defaults.getMaxRowsInMemory() : maxRowsInMemory; this.maxRowsPerSegment = maxRowsPerSegment == null ? DEFAULT_MAX_ROWS_PER_SEGMENT : maxRowsPerSegment; + // initializing this to 0, it will be lazily initialized to a value + // @see server.src.main.java.org.apache.druid.segment.indexing.TuningConfigs#getMaxBytesInMemoryOrDefault(long) + this.maxBytesInMemory = maxBytesInMemory == null ? 0 : maxBytesInMemory; + this.maxTotalRows = maxTotalRows; this.intermediatePersistPeriod = intermediatePersistPeriod == null ? defaults.getIntermediatePersistPeriod() : intermediatePersistPeriod; this.basePersistDirectory = defaults.getBasePersistDirectory(); - this.maxPendingPersists = maxPendingPersists == null ? defaults.getMaxPendingPersists() : maxPendingPersists; + this.maxPendingPersists = 0; this.indexSpec = indexSpec == null ? defaults.getIndexSpec() : indexSpec; this.reportParseExceptions = reportParseExceptions == null ? defaults.isReportParseExceptions() : reportParseExceptions; @@ -76,16 +95,59 @@ handoffConditionTimeout == null ? defaults.getHandoffConditionTimeout() : handoffConditionTimeout; this.resetOffsetAutomatically = resetOffsetAutomatically == null ? DEFAULT_RESET_OFFSET_AUTOMATICALLY : resetOffsetAutomatically; + this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; + this.intermediateHandoffPeriod = + intermediateHandoffPeriod == null ? new Period().withDays(Integer.MAX_VALUE) : intermediateHandoffPeriod; + + if (this.reportParseExceptions) { + this.maxParseExceptions = 0; + this.maxSavedParseExceptions = maxSavedParseExceptions == null ? 0 : Math.min(1, maxSavedParseExceptions); + } else { + this.maxParseExceptions = + maxParseExceptions == null ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : maxParseExceptions; + this.maxSavedParseExceptions = + maxSavedParseExceptions == null ? TuningConfig.DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS : maxSavedParseExceptions; + } + this.logParseExceptions = + logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; + } + + public static KafkaTuningConfig copyOf(KafkaTuningConfig config) { + return new KafkaTuningConfig(config.maxRowsInMemory, + config.maxBytesInMemory, + config.maxRowsPerSegment, + config.maxTotalRows, + config.intermediatePersistPeriod, + config.basePersistDirectory, + config.maxPendingPersists, + config.indexSpec, + true, + config.reportParseExceptions, + config.handoffConditionTimeout, + config.resetOffsetAutomatically, + config.segmentWriteOutMediumFactory, + config.intermediateHandoffPeriod, + config.logParseExceptions, + config.maxParseExceptions, + config.maxSavedParseExceptions); } @Override @JsonProperty public int getMaxRowsInMemory() { return maxRowsInMemory; } - @JsonProperty public int getMaxRowsPerSegment() { + @Override @JsonProperty public long getMaxBytesInMemory() { + return maxBytesInMemory; + } + + @Override @JsonProperty public int getMaxRowsPerSegment() { return maxRowsPerSegment; } + @JsonProperty @Override @Nullable public Long getMaxTotalRows() { + return maxTotalRows; + } + @Override @JsonProperty public Period getIntermediatePersistPeriod() { return intermediatePersistPeriod; } @@ -94,11 +156,7 @@ return basePersistDirectory; } - @Nullable @Override public SegmentWriteOutMediumFactory getSegmentWriteOutMediumFactory() { - return null; - } - - @Override @JsonProperty public int getMaxPendingPersists() { + @Override @JsonProperty @Deprecated public int getMaxPendingPersists() { return maxPendingPersists; } @@ -125,6 +183,46 @@ return resetOffsetAutomatically; } + @Override @JsonProperty @Nullable public SegmentWriteOutMediumFactory getSegmentWriteOutMediumFactory() { + return segmentWriteOutMediumFactory; + } + + @JsonProperty public Period getIntermediateHandoffPeriod() { + return intermediateHandoffPeriod; + } + + @JsonProperty public boolean isLogParseExceptions() { + return logParseExceptions; + } + + @JsonProperty public int getMaxParseExceptions() { + return maxParseExceptions; + } + + @JsonProperty public int getMaxSavedParseExceptions() { + return maxSavedParseExceptions; + } + + public KafkaTuningConfig withBasePersistDirectory(File dir) { + return new KafkaTuningConfig(maxRowsInMemory, + maxBytesInMemory, + maxRowsPerSegment, + maxTotalRows, + intermediatePersistPeriod, + dir, + maxPendingPersists, + indexSpec, + true, + reportParseExceptions, + handoffConditionTimeout, + resetOffsetAutomatically, + segmentWriteOutMediumFactory, + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions); + } + @Override public boolean equals(Object o) { if (this == o) { return true; @@ -132,52 +230,42 @@ if (o == null || getClass() != o.getClass()) { return false; } - KafkaTuningConfig that = (KafkaTuningConfig) o; - - if (maxRowsInMemory != that.maxRowsInMemory) { - return false; - } - if (maxRowsPerSegment != that.maxRowsPerSegment) { - return false; - } - if (maxPendingPersists != that.maxPendingPersists) { - return false; - } - if (reportParseExceptions != that.reportParseExceptions) { - return false; - } - if (handoffConditionTimeout != that.handoffConditionTimeout) { - return false; - } - if (resetOffsetAutomatically != that.resetOffsetAutomatically) { - return false; - } - if (intermediatePersistPeriod != null ? - !intermediatePersistPeriod.equals(that.intermediatePersistPeriod) : - that.intermediatePersistPeriod != null) { - return false; - } - if (basePersistDirectory != null ? - !basePersistDirectory.equals(that.basePersistDirectory) : - that.basePersistDirectory != null) { - return false; - } - return indexSpec != null ? indexSpec.equals(that.indexSpec) : that.indexSpec == null; - + return maxRowsInMemory == that.maxRowsInMemory + && maxRowsPerSegment == that.maxRowsPerSegment + && maxBytesInMemory == that.maxBytesInMemory + && Objects.equals(maxTotalRows, that.maxTotalRows) + && maxPendingPersists == that.maxPendingPersists + && reportParseExceptions == that.reportParseExceptions + && handoffConditionTimeout == that.handoffConditionTimeout + && resetOffsetAutomatically == that.resetOffsetAutomatically + && Objects.equals(intermediatePersistPeriod, that.intermediatePersistPeriod) + && Objects.equals(basePersistDirectory, that.basePersistDirectory) + && Objects.equals(indexSpec, that.indexSpec) + && Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory) + && Objects.equals(intermediateHandoffPeriod, that.intermediateHandoffPeriod) + && logParseExceptions == that.logParseExceptions + && maxParseExceptions == that.maxParseExceptions + && maxSavedParseExceptions == that.maxSavedParseExceptions; } @Override public int hashCode() { - int result = maxRowsInMemory; - result = 31 * result + maxRowsPerSegment; - result = 31 * result + (intermediatePersistPeriod != null ? intermediatePersistPeriod.hashCode() : 0); - result = 31 * result + (basePersistDirectory != null ? basePersistDirectory.hashCode() : 0); - result = 31 * result + maxPendingPersists; - result = 31 * result + (indexSpec != null ? indexSpec.hashCode() : 0); - result = 31 * result + (reportParseExceptions ? 1 : 0); - result = 31 * result + (int) (handoffConditionTimeout ^ (handoffConditionTimeout >>> 32)); - result = 31 * result + (resetOffsetAutomatically ? 1 : 0); - return result; + return Objects.hash(maxRowsInMemory, + maxRowsPerSegment, + maxBytesInMemory, + maxTotalRows, + intermediatePersistPeriod, + basePersistDirectory, + maxPendingPersists, + indexSpec, + reportParseExceptions, + handoffConditionTimeout, + resetOffsetAutomatically, + segmentWriteOutMediumFactory, + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions); } @Override public String toString() { @@ -186,6 +274,10 @@ + maxRowsInMemory + ", maxRowsPerSegment=" + maxRowsPerSegment + + ", maxTotalRows=" + + maxTotalRows + + ", maxBytesInMemory=" + + maxBytesInMemory + ", intermediatePersistPeriod=" + intermediatePersistPeriod + ", basePersistDirectory=" @@ -200,6 +292,16 @@ + handoffConditionTimeout + ", resetOffsetAutomatically=" + resetOffsetAutomatically + + ", segmentWriteOutMediumFactory=" + + segmentWriteOutMediumFactory + + ", intermediateHandoffPeriod=" + + intermediateHandoffPeriod + + ", logParseExceptions=" + + logParseExceptions + + ", maxParseExceptions=" + + maxParseExceptions + + ", maxSavedParseExceptions=" + + maxSavedParseExceptions + '}'; } } diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/security/KerberosHttpClient.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/security/KerberosHttpClient.java index cfce3ea1df..fdbbfcc7d7 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/security/KerberosHttpClient.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/security/KerberosHttpClient.java @@ -21,10 +21,10 @@ import com.google.common.base.Throwables; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; -import io.druid.java.util.http.client.AbstractHttpClient; -import io.druid.java.util.http.client.HttpClient; -import io.druid.java.util.http.client.Request; -import io.druid.java.util.http.client.response.HttpResponseHandler; +import org.apache.druid.java.util.http.client.AbstractHttpClient; +import org.apache.druid.java.util.http.client.HttpClient; +import org.apache.druid.java.util.http.client.Request; +import org.apache.druid.java.util.http.client.response.HttpResponseHandler; import org.apache.hadoop.security.UserGroupInformation; import org.jboss.netty.handler.codec.http.HttpHeaders; import org.joda.time.Duration; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/security/ResponseCookieHandler.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/security/ResponseCookieHandler.java index a2525fb8af..223000e62a 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/security/ResponseCookieHandler.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/security/ResponseCookieHandler.java @@ -19,8 +19,8 @@ package org.apache.hadoop.hive.druid.security; import com.google.common.collect.Maps; -import io.druid.java.util.http.client.response.ClientResponse; -import io.druid.java.util.http.client.response.HttpResponseHandler; +import org.apache.druid.java.util.http.client.response.ClientResponse; +import org.apache.druid.java.util.http.client.response.HttpResponseHandler; import org.jboss.netty.handler.codec.http.HttpChunk; import org.jboss.netty.handler.codec.http.HttpHeaders; import org.jboss.netty.handler.codec.http.HttpResponse; @@ -49,11 +49,11 @@ this.delegate = delegate; } - @Override public ClientResponse handleResponse(HttpResponse httpResponse) { + @Override public ClientResponse handleResponse(HttpResponse httpResponse, TrafficCop trafficCop) { try { final HttpHeaders headers = httpResponse.headers(); manager.put(uri, Maps.asMap(headers.names(), headers::getAll)); - return delegate.handleResponse(httpResponse); + return delegate.handleResponse(httpResponse, trafficCop); } catch (IOException e) { LOG.error("Error while processing Cookies from header", e); throw new RuntimeException(e); @@ -61,8 +61,8 @@ } @Override public ClientResponse handleChunk(ClientResponse clientResponse, - HttpChunk httpChunk) { - return delegate.handleChunk(clientResponse, httpChunk); + HttpChunk httpChunk, long chunkNum) { + return delegate.handleChunk(clientResponse, httpChunk, chunkNum); } @Override public ClientResponse done(ClientResponse clientResponse) { diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/security/RetryIfUnauthorizedResponseHandler.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/security/RetryIfUnauthorizedResponseHandler.java index 7d6de9828b..d6702f5a1f 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/security/RetryIfUnauthorizedResponseHandler.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/security/RetryIfUnauthorizedResponseHandler.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hive.druid.security; -import io.druid.java.util.http.client.response.ClientResponse; -import io.druid.java.util.http.client.response.HttpResponseHandler; +import org.apache.druid.java.util.http.client.response.ClientResponse; +import org.apache.druid.java.util.http.client.response.HttpResponseHandler; import org.jboss.netty.handler.codec.http.HttpChunk; import org.jboss.netty.handler.codec.http.HttpResponse; import org.jboss.netty.handler.codec.http.HttpResponseStatus; @@ -42,7 +42,8 @@ public RetryIfUnauthorizedResponseHandler(HttpResponseHandler> handleResponse(HttpResponse httpResponse) { + @Override public ClientResponse> handleResponse(HttpResponse httpResponse, + TrafficCop trafficCop) { LOG.debug("UnauthorizedResponseHandler - Got response status {}", httpResponse.getStatus()); if (httpResponse.getStatus().equals(HttpResponseStatus.UNAUTHORIZED)) { // Drain the buffer @@ -50,20 +51,20 @@ public RetryIfUnauthorizedResponseHandler(HttpResponseHandler> handleChunk( ClientResponse> clientResponse, - HttpChunk httpChunk) { + HttpChunk httpChunk, long chunkNum) { if (clientResponse.getObj().shouldRetry()) { // Drain the buffer //noinspection ResultOfMethodCallIgnored httpChunk.getContent().toString(); return clientResponse; } else { - return wrap(httpResponseHandler.handleChunk(unwrap(clientResponse), httpChunk)); + return wrap(httpResponseHandler.handleChunk(unwrap(clientResponse), httpChunk, chunkNum)); } } diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java index 9efa6f6bf6..038b1f63b0 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java @@ -19,8 +19,8 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JavaType; -import io.druid.data.input.MapBasedRow; -import io.druid.data.input.Row; +import org.apache.druid.data.input.MapBasedRow; +import org.apache.druid.data.input.Row; import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils; import org.apache.hadoop.hive.druid.conf.DruidConstants; import org.apache.hadoop.io.NullWritable; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java index daebdcfa2d..edbea22c54 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java @@ -24,14 +24,14 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; -import io.druid.java.util.common.IAE; -import io.druid.java.util.common.RE; -import io.druid.java.util.common.guava.CloseQuietly; -import io.druid.java.util.http.client.HttpClient; -import io.druid.java.util.http.client.Request; -import io.druid.java.util.http.client.response.InputStreamResponseHandler; -import io.druid.query.Query; -import io.druid.query.QueryInterruptedException; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.RE; +import org.apache.druid.java.util.common.guava.CloseQuietly; +import org.apache.druid.java.util.http.client.HttpClient; +import org.apache.druid.java.util.http.client.Request; +import org.apache.druid.java.util.http.client.response.InputStreamResponseHandler; +import org.apache.druid.query.Query; +import org.apache.druid.query.QueryInterruptedException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.druid.DruidStorageHandler; import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidScanQueryRecordReader.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidScanQueryRecordReader.java index 4d9d1a9680..5bbee637cd 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidScanQueryRecordReader.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidScanQueryRecordReader.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hive.druid.serde; -import io.druid.query.scan.ScanResultValue; +import org.apache.druid.query.scan.ScanResultValue; import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils; import org.apache.hadoop.io.NullWritable; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSelectQueryRecordReader.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSelectQueryRecordReader.java index 2c4c8f9910..0b88fe2588 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSelectQueryRecordReader.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSelectQueryRecordReader.java @@ -28,9 +28,9 @@ import com.fasterxml.jackson.core.type.TypeReference; -import io.druid.query.Result; -import io.druid.query.select.EventHolder; -import io.druid.query.select.SelectResultValue; +import org.apache.druid.query.Result; +import org.apache.druid.query.select.EventHolder; +import org.apache.druid.query.select.SelectResultValue; /** * Record reader for results for Druid SelectQuery. diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java index 696a2469c4..cd21824c66 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java @@ -20,11 +20,11 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import io.druid.query.Druids; -import io.druid.query.Druids.SegmentMetadataQueryBuilder; -import io.druid.query.metadata.metadata.ColumnAnalysis; -import io.druid.query.metadata.metadata.SegmentAnalysis; -import io.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.Druids; +import org.apache.druid.query.Druids.SegmentMetadataQueryBuilder; +import org.apache.druid.query.metadata.metadata.ColumnAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTimeseriesQueryRecordReader.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTimeseriesQueryRecordReader.java index beb342b554..d8d261bfd3 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTimeseriesQueryRecordReader.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTimeseriesQueryRecordReader.java @@ -19,8 +19,8 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JavaType; -import io.druid.query.Result; -import io.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.query.Result; +import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils; import org.apache.hadoop.hive.druid.conf.DruidConstants; import org.apache.hadoop.io.NullWritable; diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java index 64833dc71f..743858b692 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java @@ -27,9 +27,9 @@ import com.fasterxml.jackson.core.type.TypeReference; -import io.druid.query.Result; -import io.druid.query.topn.DimensionAndMetricValueExtractor; -import io.druid.query.topn.TopNResultValue; +import org.apache.druid.query.Result; +import org.apache.druid.query.topn.DimensionAndMetricValueExtractor; +import org.apache.druid.query.topn.TopNResultValue; /** * Record reader for results for Druid TopNQuery. diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/DerbyConnectorTestUtility.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/DerbyConnectorTestUtility.java index da05b832d2..9119aa5e6b 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/druid/DerbyConnectorTestUtility.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/DerbyConnectorTestUtility.java @@ -19,10 +19,10 @@ package org.apache.hadoop.hive.druid; import com.google.common.base.Suppliers; -import io.druid.metadata.MetadataStorageConnectorConfig; -import io.druid.metadata.MetadataStorageTablesConfig; -import io.druid.metadata.storage.derby.DerbyConnector; -import io.druid.metadata.storage.derby.DerbyMetadataStorage; +import org.apache.druid.metadata.MetadataStorageConnectorConfig; +import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.metadata.storage.derby.DerbyConnector; +import org.apache.druid.metadata.storage.derby.DerbyMetadataStorage; import org.junit.Assert; import org.junit.rules.ExternalResource; import org.skife.jdbi.v2.DBI; diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe.java index 64d5b2ec3e..1764944ac0 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe.java @@ -25,8 +25,8 @@ import com.fasterxml.jackson.core.type.TypeReference; -import io.druid.query.metadata.metadata.SegmentAnalysis; -import io.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.metadata.metadata.SegmentAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; /** * Druid SerDe to be used in tests. diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java index 35808c9d29..55aa1c84b7 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java @@ -25,8 +25,8 @@ import com.fasterxml.jackson.core.type.TypeReference; -import io.druid.query.metadata.metadata.SegmentAnalysis; -import io.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.metadata.metadata.SegmentAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; /** * Druid SerDe to be used in tests. diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/TestDruidStorageHandler.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/TestDruidStorageHandler.java index 419f088601..0b2072c2e1 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/druid/TestDruidStorageHandler.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/TestDruidStorageHandler.java @@ -22,17 +22,17 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import io.druid.indexer.JobHelper; -import io.druid.indexer.SQLMetadataStorageUpdaterJobHandler; -import io.druid.metadata.MetadataStorageTablesConfig; -import io.druid.segment.loading.DataSegmentPusher; -import io.druid.segment.loading.SegmentLoadingException; -import io.druid.storage.hdfs.HdfsDataSegmentPusher; -import io.druid.storage.hdfs.HdfsDataSegmentPusherConfig; -import io.druid.timeline.DataSegment; -import io.druid.timeline.partition.LinearShardSpec; -import io.druid.timeline.partition.NoneShardSpec; -import io.druid.timeline.partition.ShardSpec; +import org.apache.druid.indexer.JobHelper; +import org.apache.druid.indexer.SQLMetadataStorageUpdaterJobHandler; +import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.loading.SegmentLoadingException; +import org.apache.druid.storage.hdfs.HdfsDataSegmentPusher; +import org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.apache.druid.timeline.partition.NoneShardSpec; +import org.apache.druid.timeline.partition.ShardSpec; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/TestHiveDruidQueryBasedInputFormat.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/TestHiveDruidQueryBasedInputFormat.java index 42bde3583e..591cbe870f 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/druid/TestHiveDruidQueryBasedInputFormat.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/TestHiveDruidQueryBasedInputFormat.java @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.druid.io.HiveDruidSplit; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import io.druid.query.Query; +import org.apache.druid.query.Query; import junit.framework.TestCase; /** @@ -54,6 +54,7 @@ + "\"granularity\":\"DAY\"," + "\"aggregations\":[]," + "\"postAggregations\":[]," + + "\"limit\":2147483647," + "\"context\":{\"queryId\":\"\"}}, [localhost:8082]}]"; private static final String diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java index 656dc25e6f..546eac6964 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java @@ -35,9 +35,9 @@ import java.util.Properties; import java.util.stream.Collectors; -import io.druid.java.util.http.client.HttpClient; -import io.druid.java.util.http.client.response.HttpResponseHandler; -import io.druid.query.scan.ScanResultValue; +import org.apache.druid.java.util.http.client.HttpClient; +import org.apache.druid.java.util.http.client.response.HttpResponseHandler; +import org.apache.druid.query.scan.ScanResultValue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveChar; @@ -80,12 +80,12 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.SettableFuture; -import io.druid.data.input.Row; -import io.druid.query.Query; -import io.druid.query.Result; -import io.druid.query.select.SelectResultValue; -import io.druid.query.timeseries.TimeseriesResultValue; -import io.druid.query.topn.TopNResultValue; +import org.apache.druid.data.input.Row; +import org.apache.druid.query.Query; +import org.apache.druid.query.Result; +import org.apache.druid.query.select.SelectResultValue; +import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.query.topn.TopNResultValue; import org.junit.rules.ExpectedException; /** diff --git a/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java b/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java index 0df150dbd0..91b5f8bc29 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java @@ -23,33 +23,33 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.druid.data.input.Firehose; -import io.druid.data.input.InputRow; -import io.druid.data.input.impl.DimensionsSpec; -import io.druid.data.input.impl.InputRowParser; -import io.druid.data.input.impl.MapInputRowParser; -import io.druid.data.input.impl.StringDimensionSchema; -import io.druid.data.input.impl.TimeAndDimsParseSpec; -import io.druid.data.input.impl.TimestampSpec; -import io.druid.java.util.common.granularity.Granularities; -import io.druid.query.aggregation.AggregatorFactory; -import io.druid.query.aggregation.LongSumAggregatorFactory; -import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; -import io.druid.segment.IndexSpec; -import io.druid.segment.QueryableIndex; -import io.druid.segment.QueryableIndexStorageAdapter; -import io.druid.segment.data.RoaringBitmapSerdeFactory; -import io.druid.segment.indexing.DataSchema; -import io.druid.segment.indexing.RealtimeTuningConfig; -import io.druid.segment.indexing.granularity.UniformGranularitySpec; -import io.druid.segment.loading.DataSegmentPusher; -import io.druid.segment.loading.LocalDataSegmentPuller; -import io.druid.segment.loading.LocalDataSegmentPusher; -import io.druid.segment.loading.LocalDataSegmentPusherConfig; -import io.druid.segment.loading.SegmentLoadingException; -import io.druid.segment.realtime.firehose.IngestSegmentFirehose; -import io.druid.segment.realtime.firehose.WindowedStorageAdapter; -import io.druid.timeline.DataSegment; +import org.apache.druid.data.input.Firehose; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.InputRowParser; +import org.apache.druid.data.input.impl.MapInputRowParser; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.indexing.RealtimeTuningConfig; +import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; +import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.loading.LocalDataSegmentPuller; +import org.apache.druid.segment.loading.LocalDataSegmentPusher; +import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig; +import org.apache.druid.segment.loading.SegmentLoadingException; +import org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose; +import org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter; +import org.apache.druid.timeline.DataSegment; import org.apache.calcite.adapter.druid.DruidTable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -150,6 +150,7 @@ RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, + null, null, null, temporaryFolder.newFolder(), @@ -164,7 +165,8 @@ null, null, 0L, - null); + null, + null); LocalFileSystem localFileSystem = FileSystem.getLocal(config); DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() { @Override public File getStorageDirectory() { diff --git a/itests/qtest-druid/pom.xml b/itests/qtest-druid/pom.xml index 10ddfaa006..260e73d2b1 100644 --- a/itests/qtest-druid/pom.xml +++ b/itests/qtest-druid/pom.xml @@ -39,7 +39,7 @@ ../.. 4.0.0 1.19.3 - 9.3.19.v20170502 + 9.4.10.v20180503 10.11.1.1 16.0.1 4.1.0 @@ -47,7 +47,7 @@ - io.druid + org.apache.druid druid-server ${druid.version} @@ -66,7 +66,7 @@ - io.druid + org.apache.druid druid-services ${druid.version} @@ -90,7 +90,7 @@ ${druid.guava.version} - io.druid.extensions + org.apache.druid.extensions druid-hdfs-storage ${druid.version} @@ -105,12 +105,12 @@ - io.druid.extensions + org.apache.druid.extensions druid-kafka-indexing-service ${druid.version} - io.druid.extensions + org.apache.druid.extensions druid-avro-extensions ${druid.version} @@ -246,7 +246,7 @@ false - io.druid.cli.Main + org.apache.druid.cli.Main diff --git a/itests/qtest-druid/src/main/java/org/apache/hive/druid/ForkingDruidNode.java b/itests/qtest-druid/src/main/java/org/apache/hive/druid/ForkingDruidNode.java index 8234084965..9703faa4b6 100644 --- a/itests/qtest-druid/src/main/java/org/apache/hive/druid/ForkingDruidNode.java +++ b/itests/qtest-druid/src/main/java/org/apache/hive/druid/ForkingDruidNode.java @@ -58,7 +58,7 @@ private final List allowedPrefixes = Lists.newArrayList( "com.metamx", "druid", - "io.druid", + "org.apache.druid", "java.io.tmpdir", "hadoop" ); @@ -111,7 +111,7 @@ public ForkingDruidNode(String nodeType, } this.properties .forEach((key, value) -> command.add(String.format("-D%s=%s", key, value))); - command.addAll(Lists.newArrayList("io.druid.cli.Main", "server", getNodeType())); + command.addAll(Lists.newArrayList("org.apache.druid.cli.Main", "server", getNodeType())); processBuilder.command(command); log.info("Creating forking druid node with " + String.join(" ", processBuilder.command())); } diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 18e4f7f02c..922aced39c 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1847,6 +1847,8 @@ druid.query.files=druidmini_test1.q,\ druidmini_mv.q,\ druid_timestamptz.q,\ druid_timestamptz2.q,\ + druid_topn.q,\ + druid_timeseries.q,\ druidmini_dynamic_partition.q,\ druidmini_expressions.q,\ druidmini_extractTime.q,\ diff --git a/pom.xml b/pom.xml index 9871bae60e..e7857018f1 100644 --- a/pom.xml +++ b/pom.xml @@ -145,7 +145,7 @@ 10.14.1.0 3.1.0 0.1.2 - 0.12.1 + 0.13.0-incubating 1.2.0-3f79e055 19.0 2.4.11 @@ -264,6 +264,16 @@ false + + druid-apache-rc-testing + https://repository.apache.org/content/repositories/orgapachedruid-1001/ + + true + + + true + + diff --git a/ql/src/test/results/clientpositive/druid/druid_timeseries.q.out b/ql/src/test/results/clientpositive/druid/druid_timeseries.q.out new file mode 100644 index 0000000000..9c0908da0b --- /dev/null +++ b/ql/src/test/results/clientpositive/druid/druid_timeseries.q.out @@ -0,0 +1,640 @@ +PREHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n3 +STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' +TBLPROPERTIES ("druid.datasource" = "wikipedia") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table_1_n3 +POSTHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n3 +STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' +TBLPROPERTIES ("druid.datasource" = "wikipedia") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table_1_n3 +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` >= '2010-01-01 00:00:00 UTC' AND `__time` <= '2012-03-01 00:00:00 UTC' OR added <= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` >= '2010-01-01 00:00:00 UTC' AND `__time` <= '2012-03-01 00:00:00 UTC' OR added <= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"2010-01-01T00:00:00.000Z","lowerStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"bound","dimension":"__time","upper":"2012-03-01T00:00:00.000Z","upperStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}]},{"type":"bound","dimension":"added","upper":"0.0","upperStrict":false,"ordering":"numeric"}]},"aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} + druid.query.type timeseries + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` <= '2010-01-01 00:00:00 UTC' +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` <= '2010-01-01 00:00:00 UTC' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z"],"context":{"skipEmptyBuckets":false}} + druid.query.type timeseries + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT max(added), sum(variation) +FROM druid_table_1_n3 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT max(added), sum(variation) +FROM druid_table_1_n3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames $f0,$f1 + druid.fieldTypes float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"doubleMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} + druid.query.type timeseries + Select Operator + expressions: $f0 (type: float), $f1 (type: double) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `__time`, max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY `__time` +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT `__time`, max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY `__time` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames extract,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: extract (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_year(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_year(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_year(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_year(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_quarter(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_quarter(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_quarter(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_quarter(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P3M","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_month(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_month(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_month(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_month(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_week(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_week(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_week(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_week(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P1W","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_day(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_day(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_day(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_day(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_hour(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_hour(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_hour(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_hour(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_minute(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_minute(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_minute(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_minute(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1M","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_second(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_second(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_second(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +GROUP BY floor_second(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1S","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_hour(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +WHERE robot='1' +GROUP BY floor_hour(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_hour(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +WHERE robot='1' +GROUP BY floor_hour(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT floor_hour(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +WHERE floor_hour(`__time`) + BETWEEN CAST('2010-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) + AND CAST('2014-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) +GROUP BY floor_hour(`__time`) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT floor_hour(`__time`), max(added), sum(variation) +FROM druid_table_1_n3 +WHERE floor_hour(`__time`) + BETWEEN CAST('2010-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) + AND CAST('2014-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) +GROUP BY floor_hour(`__time`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["2010-01-01T08:00:00.000Z/2014-01-01T08:00:00.001Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT subq.h, subq.m, subq.s +FROM +( + SELECT floor_hour(`__time`) as h, max(added) as m, sum(variation) as s + FROM druid_table_1_n3 + GROUP BY floor_hour(`__time`) +) subq +WHERE subq.h BETWEEN CAST('2010-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) + AND CAST('2014-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT subq.h, subq.m, subq.s +FROM +( + SELECT floor_hour(`__time`) as h, max(added) as m, sum(variation) as s + FROM druid_table_1_n3 + GROUP BY floor_hour(`__time`) +) subq +WHERE subq.h BETWEEN CAST('2010-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) + AND CAST('2014-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames timestamp,$f1,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["2010-01-01T08:00:00.000Z/2014-01-01T08:00:00.001Z"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Select Operator + expressions: timestamp (type: timestamp with local time zone), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} + druid.query.type timeseries + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` <= '2010-01-01 00:00:00 UTC' +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` <= '2010-01-01 00:00:00 UTC' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z"],"context":{"skipEmptyBuckets":false}} + druid.query.type timeseries + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` >= '2010-01-01 00:00:00' +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` >= '2010-01-01 00:00:00' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["2010-01-01T08:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} + druid.query.type timeseries + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` <= '2010-01-01 00:00:00' OR `__time` <= '2012-03-01 00:00:00' +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1_n3 where `__time` <= '2010-01-01 00:00:00' OR `__time` <= '2012-03-01 00:00:00' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n3 + properties: + druid.fieldNames $f0 + druid.fieldTypes bigint + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/2012-03-01T08:00:00.001Z"],"context":{"skipEmptyBuckets":false}} + druid.query.type timeseries + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + ListSink + diff --git a/ql/src/test/results/clientpositive/druid/druid_topn.q.out b/ql/src/test/results/clientpositive/druid/druid_topn.q.out new file mode 100644 index 0000000000..cc8da8ec1e --- /dev/null +++ b/ql/src/test/results/clientpositive/druid/druid_topn.q.out @@ -0,0 +1,375 @@ +PREHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n1 +STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' +TBLPROPERTIES ("druid.datasource" = "wikipedia") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table_1_n1 +POSTHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n1 +STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' +TBLPROPERTIES ("druid.datasource" = "wikipedia") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table_1_n1 +PREHOOK: query: DESCRIBE FORMATTED druid_table_1_n1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@druid_table_1_n1 +POSTHOOK: query: DESCRIBE FORMATTED druid_table_1_n1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@druid_table_1_n1 +# col_name data_type comment +__time timestamp with local time zone from deserializer +robot string from deserializer +namespace string from deserializer +anonymous string from deserializer +unpatrolled string from deserializer +page string from deserializer +language string from deserializer +newpage string from deserializer +user string from deserializer +count float from deserializer +added float from deserializer +delta float from deserializer +variation float from deserializer +deleted float from deserializer + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"__time\":\"true\",\"added\":\"true\",\"anonymous\":\"true\",\"count\":\"true\",\"deleted\":\"true\",\"delta\":\"true\",\"language\":\"true\",\"namespace\":\"true\",\"newpage\":\"true\",\"page\":\"true\",\"robot\":\"true\",\"unpatrolled\":\"true\",\"user\":\"true\",\"variation\":\"true\"}} + EXTERNAL TRUE + bucketing_version 2 + discover.partitions true + druid.datasource wikipedia + numFiles 0 + numRows 0 + rawDataSize 0 + storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.druid.QTestDruidSerDe +InputFormat: null +OutputFormat: null +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: EXPLAIN +SELECT robot, max(added) as m, sum(variation) +FROM druid_table_1_n1 +GROUP BY robot +ORDER BY m DESC +LIMIT 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT robot, max(added) as m, sum(variation) +FROM druid_table_1_n1 +GROUP BY robot +ORDER BY m DESC +LIMIT 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n1 + properties: + druid.fieldNames robot,$f1,$f2 + druid.fieldTypes string,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f1","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: robot (type: string), $f1 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: EXPLAIN +SELECT robot, `__time`, max(added), sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, `__time` +ORDER BY s DESC +LIMIT 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT robot, `__time`, max(added), sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, `__time` +ORDER BY s DESC +LIMIT 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n1 + properties: + druid.fieldNames extract,robot,$f2,$f3 + druid.fieldTypes timestamp with local time zone,string,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: robot (type: string), extract (type: timestamp with local time zone), $f2 (type: float), $f3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink + +PREHOOK: query: EXPLAIN +SELECT robot, floor_year(`__time`), max(added), sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, floor_year(`__time`) +ORDER BY s DESC +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT robot, floor_year(`__time`), max(added), sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, floor_year(`__time`) +ORDER BY s DESC +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n1 + properties: + druid.fieldNames robot,floor_year,$f2,$f3 + druid.fieldTypes string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_year","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: robot (type: string), floor_year (type: timestamp with local time zone), $f2 (type: float), $f3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink + +PREHOOK: query: EXPLAIN +SELECT robot, floor_month(`__time`), max(added), sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, floor_month(`__time`) +ORDER BY s +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT robot, floor_month(`__time`), max(added), sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, floor_month(`__time`) +ORDER BY s +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n1 + properties: + druid.fieldNames robot,floor_month,$f2,$f3 + druid.fieldTypes string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: robot (type: string), floor_month (type: timestamp with local time zone), $f2 (type: float), $f3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink + +PREHOOK: query: EXPLAIN +SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, namespace, floor_month(`__time`) +ORDER BY s DESC, m DESC +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, namespace, floor_month(`__time`) +ORDER BY s DESC, m DESC +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n1 + properties: + druid.fieldNames robot,namespace,floor_month,$f3,$f4 + druid.fieldTypes string,string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"namespace","outputName":"namespace","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f4","direction":"descending","dimensionOrder":"numeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: robot (type: string), floor_month (type: timestamp with local time zone), $f3 (type: float), $f4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink + +PREHOOK: query: EXPLAIN +SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, namespace, floor_month(`__time`) +ORDER BY robot ASC, m DESC +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT robot, floor_month(`__time`), max(added) as m, sum(variation) as s +FROM druid_table_1_n1 +GROUP BY robot, namespace, floor_month(`__time`) +ORDER BY robot ASC, m DESC +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n1 + properties: + druid.fieldNames robot,namespace,floor_month,$f3,$f4 + druid.fieldTypes string,string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"namespace","outputName":"namespace","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_month","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1M","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: robot (type: string), floor_month (type: timestamp with local time zone), $f3 (type: float), $f4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink + +PREHOOK: query: EXPLAIN +SELECT robot, floor_year(`__time`), max(added), sum(variation) as s +FROM druid_table_1_n1 +WHERE robot='1' +GROUP BY robot, floor_year(`__time`) +ORDER BY s +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT robot, floor_year(`__time`), max(added), sum(variation) as s +FROM druid_table_1_n1 +WHERE robot='1' +GROUP BY robot, floor_year(`__time`) +ORDER BY s +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n1 + properties: + druid.fieldNames floor_year,$f1_0,$f2 + druid.fieldTypes timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"floor_year","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1Y","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f2","direction":"ascending","dimensionOrder":"numeric"}]},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"doubleMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: '1' (type: string), floor_year (type: timestamp with local time zone), $f1_0 (type: float), $f2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink + +PREHOOK: query: EXPLAIN +SELECT robot, floor_hour(`__time`), max(added) as m, sum(variation) +FROM druid_table_1_n1 +WHERE floor_hour(`__time`) + BETWEEN CAST('2010-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) + AND CAST('2014-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) +GROUP BY robot, floor_hour(`__time`) +ORDER BY m +LIMIT 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_1_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT robot, floor_hour(`__time`), max(added) as m, sum(variation) +FROM druid_table_1_n1 +WHERE floor_hour(`__time`) + BETWEEN CAST('2010-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) + AND CAST('2014-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) +GROUP BY robot, floor_hour(`__time`) +ORDER BY m +LIMIT 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_1_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1_n1 + properties: + druid.fieldNames robot,floor_hour,$f2,$f3 + druid.fieldTypes string,timestamp with local time zone,float,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_hour","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"timeZone":"UTC","locale":"und"}}],"limitSpec":{"type":"default","limit":100,"columns":[{"dimension":"$f2","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["2010-01-01T08:00:00.000Z/2014-01-01T08:00:00.001Z"]} + druid.query.type groupBy + Select Operator + expressions: robot (type: string), floor_hour (type: timestamp with local time zone), $f2 (type: float), $f3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink + diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index 56065fffd4..fd4fb0557b 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -911,7 +911,7 @@ POSTHOOK: query: select count(DISTINCT cstring2), sum(cdouble) FROM druid_table_ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -6078 2.7308662809692383E7 +6078 2.7308662793799996E7 PREHOOK: query: select count(distinct cstring2), sum(2 * cdouble) FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_alltypesorc @@ -920,7 +920,7 @@ POSTHOOK: query: select count(distinct cstring2), sum(2 * cdouble) FROM druid_ta POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -6078 5.4617325619384766E7 +6078 5.461732558759999E7 PREHOOK: query: select count(DISTINCT cstring2) FROM druid_table_alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_alltypesorc @@ -1010,7 +1010,7 @@ POSTHOOK: query: select count(DISTINCT cstring2), sum(cdouble) FROM druid_table_ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -6078 2.7308662809692383E7 +6078 2.7308662793799996E7 PREHOOK: query: select count(distinct cstring2 || '_'|| cstring1), sum(cdouble), min(cint) FROM druid_table_alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_alltypesorc @@ -1019,7 +1019,7 @@ POSTHOOK: query: select count(distinct cstring2 || '_'|| cstring1), sum(cdouble) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -6095 2.7308662809692383E7 -1073279343 +6095 2.7308662793799996E7 -1073279343 PREHOOK: query: select count(*) from (select `__time` from druid_table_alltypesorc limit 1025) as src PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_alltypesorc @@ -1616,7 +1616,7 @@ GROUP BY CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DAT POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -3.832948323764436E14 1969-12-01 +3.83294831503382E14 1969-12-01 PREHOOK: query: explain SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1, DATE_SUB(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_2 from druid_table_alltypesorc order by date_1, date_2 limit 3 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_alltypesorc @@ -2003,7 +2003,7 @@ POSTHOOK: query: SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n1 POSTHOOK: Output: hdfs://### HDFS PATH ### -1970-01-01 1970-01-01 +NULL NULL 2004-04-04 2004-04-04 2004-04-09 2004-04-09 2004-04-11 2004-04-11 @@ -2361,7 +2361,7 @@ POSTHOOK: query: select max(cint * cdouble) from (select `cfloat`, `cstring1`, ` POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -8.256991051261554E15 +8.256991041892764E15 PREHOOK: query: explain select max(cint * cfloat) from (select `cfloat`, `cstring1`, `cint`, `cdouble` from druid_table_alltypesorc limit 90000) as src PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_alltypesorc diff --git a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index 0c1ca76451..96690af8be 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -78,8 +78,8 @@ POSTHOOK: query: SELECT a, b, c FROM cmv_mat_view_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_mat_view_n2 POSTHOOK: Output: hdfs://### HDFS PATH ### -2 bob 3.140000104904175 -2 bonnie 172342.203125 +2 bob 3.14 +2 bonnie 172342.2 PREHOOK: query: SHOW TBLPROPERTIES cmv_mat_view_n2 PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES cmv_mat_view_n2 @@ -125,9 +125,9 @@ POSTHOOK: query: SELECT a, c FROM cmv_mat_view2_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_mat_view2_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### -3 15.800000190734863 -3 9.800000190734863 -3 978.760009765625 +3 15.8 +3 9.8 +3 978.76 PREHOOK: query: SHOW TBLPROPERTIES cmv_mat_view2_n0 PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES cmv_mat_view2_n0 @@ -193,9 +193,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable_n2 POSTHOOK: Input: default@cmv_mat_view2_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### -3 15.800000190734863 -3 9.800000190734863 -3 978.760009765625 +3 15.8 +3 9.8 +3 978.76 Warning: Shuffle Join MERGEJOIN[10][tables = [cmv_mat_view2_n0, $hdt$_0]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( @@ -312,9 +312,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable_n2 POSTHOOK: Input: default@cmv_mat_view2_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### -3 15.800000190734863 3 978.76 -3 9.800000190734863 3 978.76 -3 978.760009765625 3 978.76 +3 15.8 3 978.76 +3 9.8 3 978.76 +3 978.76 3 978.76 PREHOOK: query: INSERT INTO cmv_basetable_n2 VALUES (cast(current_timestamp() AS timestamp), 3, 'charlie', 'charlie_c', 15.8, 1) PREHOOK: type: QUERY @@ -675,10 +675,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable_n2 POSTHOOK: Input: default@cmv_mat_view2_n0 POSTHOOK: Output: hdfs://### HDFS PATH ### -3 15.800000190734863 3 978.76 -3 15.800000190734863 3 978.76 -3 9.800000190734863 3 978.76 -3 978.760009765625 3 978.76 +3 15.8 3 978.76 +3 15.8 3 978.76 +3 9.8 3 978.76 +3 978.76 3 978.76 PREHOOK: query: DROP MATERIALIZED VIEW cmv_mat_view_n2 PREHOOK: type: DROP_MATERIALIZED_VIEW PREHOOK: Input: default@cmv_mat_view_n2 diff --git a/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out b/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out index 7aa1763161..395a69be23 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out @@ -75,7 +75,7 @@ FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -1969-01-01 00:00:00.0 US/Pacific -39590.24724686146 2.7308662809692383E7 -39967 7781089 1408069801800 10992545287 +1969-01-01 00:00:00.0 US/Pacific -39590.24724686146 2.7308662793799996E7 -39967 7781089 1408069801800 10992545287 PREHOOK: query: EXPLAIN SELECT floor_year(`__time`), MIN(cfloat), MIN(cdouble), MIN(ctinyint), MIN(csmallint),MIN(cint), MIN(cbigint) FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) PREHOOK: type: QUERY @@ -116,7 +116,7 @@ FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -1969-01-01 00:00:00.0 US/Pacific -1790.7781 -308691.84375 2 14255 -1073279343 -8577981133 +1969-01-01 00:00:00.0 US/Pacific -1790.7781 -308691.8399999999 2 14255 -1073279343 -8577981133 PREHOOK: query: EXPLAIN SELECT floor_year(`__time`), MAX(cfloat), MAX(cdouble), MAX(ctinyint), MAX(csmallint),MAX(cint), MAX(cbigint) FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) PREHOOK: type: QUERY @@ -157,7 +157,7 @@ FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -1969-01-01 00:00:00.0 US/Pacific 769.16394 1.9565518E7 -45 -8101 1276572707 4923772860 +1969-01-01 00:00:00.0 US/Pacific 769.16394 1.9565517977799997E7 -45 -8101 1276572707 4923772860 PREHOOK: query: EXPLAIN SELECT cstring1, SUM(cdouble) as s FROM druid_table_alltypesorc GROUP BY cstring1 ORDER BY s ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_alltypesorc @@ -194,8 +194,8 @@ POSTHOOK: query: SELECT cstring1, SUM(cdouble) as s FROM druid_table_alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -1cGVWH7n1QU -596096.6875 -821UdmGbkEf4j -14161.827026367188 +1cGVWH7n1QU -596096.6799999999 +821UdmGbkEf4j -14161.82699999999 00iT08 0.0 02v8WnLuYDos3Cq 0.0 yv1js 0.0 @@ -240,7 +240,7 @@ POSTHOOK: query: SELECT cstring2, MAX(cdouble) FROM druid_table_alltypesorc GROU POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -NULL 1.9565518E7 +NULL 1.9565517977799997E7 0034fkcXMQI3 15601.0 004J8y 0.0 00GNm -200.0 diff --git a/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out b/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out index 9c412d97dd..917e22a32a 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out @@ -17,7 +17,7 @@ FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -1969-01-01 00:00:00.0 US/Pacific -39590.24724686146 2.7308662809692383E7 -39967 7781089 1408069801800 10992545287 +1969-01-01 00:00:00.0 US/Pacific -39590.24724686146 2.7308662793799996E7 -39967 7781089 1408069801800 10992545287 PREHOOK: query: SELECT floor_year(`__time`), MIN(cfloat), MIN(cdouble), MIN(ctinyint), MIN(csmallint),MIN(cint), MIN(cbigint) FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) PREHOOK: type: QUERY @@ -28,7 +28,7 @@ FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -1969-01-01 00:00:00.0 US/Pacific -1790.7781 -308691.84375 2 14255 -1073279343 -8577981133 +1969-01-01 00:00:00.0 US/Pacific -1790.7781 -308691.8399999999 2 14255 -1073279343 -8577981133 PREHOOK: query: SELECT floor_year(`__time`), MAX(cfloat), MAX(cdouble), MAX(ctinyint), MAX(csmallint),MAX(cint), MAX(cbigint) FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) PREHOOK: type: QUERY @@ -39,7 +39,7 @@ FROM druid_table_alltypesorc GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -1969-01-01 00:00:00.0 US/Pacific 769.16394 1.9565518E7 -45 -8101 1276572707 4923772860 +1969-01-01 00:00:00.0 US/Pacific 769.16394 1.9565517977799997E7 -45 -8101 1276572707 4923772860 PREHOOK: query: SELECT cstring1, SUM(cdouble) as s FROM druid_table_alltypesorc GROUP BY cstring1 ORDER BY s ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table_alltypesorc @@ -48,8 +48,8 @@ POSTHOOK: query: SELECT cstring1, SUM(cdouble) as s FROM druid_table_alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -1cGVWH7n1QU -596096.6875 -821UdmGbkEf4j -14161.827026367188 +1cGVWH7n1QU -596096.6799999999 +821UdmGbkEf4j -14161.82699999999 00iT08 0.0 02v8WnLuYDos3Cq 0.0 yv1js 0.0 @@ -66,7 +66,7 @@ POSTHOOK: query: SELECT cstring2, MAX(cdouble) FROM druid_table_alltypesorc GROU POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -NULL 1.9565518E7 +NULL 1.9565517977799997E7 0034fkcXMQI3 15601.0 004J8y 0.0 00GNm -200.0 -- 2.17.2 (Apple Git-113)