diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java index 076f00af37..597761b4cd 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.druid; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.jsontype.NamedType; @@ -47,7 +48,6 @@ import io.druid.metadata.MetadataStorageTablesConfig; import io.druid.metadata.SQLMetadataConnector; import io.druid.metadata.storage.mysql.MySQLConnector; -import io.druid.query.Druids; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory; @@ -60,7 +60,7 @@ import io.druid.query.expression.TimestampParseExprMacro; import io.druid.query.expression.TimestampShiftExprMacro; import io.druid.query.expression.TrimExprMacro; -import io.druid.query.select.PagingSpec; +import io.druid.query.scan.ScanQuery; import io.druid.query.select.SelectQueryConfig; import io.druid.query.spec.MultipleIntervalSegmentSpec; import io.druid.segment.IndexIO; @@ -127,7 +127,6 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -635,19 +634,16 @@ public static Path makeSegmentDescriptorOutputPath(DataSegment pushedSegment, ); } - public static String createSelectStarQuery(String dataSource) throws IOException { - // Create Select query - Druids.SelectQueryBuilder builder = new Druids.SelectQueryBuilder(); - builder.dataSource(dataSource); + public static String createScanAllQuery(String dataSourceName) throws JsonProcessingException { + final ScanQuery.ScanQueryBuilder scanQueryBuilder = ScanQuery.newScanQueryBuilder(); final List intervals = Arrays.asList(DEFAULT_INTERVAL); - builder.intervals(new MultipleIntervalSegmentSpec(intervals)); - builder.pagingSpec(PagingSpec.newSpec(1)); - Map context = new HashMap<>(); - context.put(Constants.DRUID_QUERY_FETCH, false); - builder.context(context); - return JSON_MAPPER.writeValueAsString(builder.build()); + ScanQuery scanQuery = scanQueryBuilder + .dataSource(dataSourceName) + .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .intervals(new MultipleIntervalSegmentSpec(intervals)) + .build(); + return JSON_MAPPER.writeValueAsString(scanQuery); } - /** * Simple interface for retry operations */ diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java index c2d3fe5a49..4abe4b6ff3 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java @@ -17,13 +17,19 @@ */ package org.apache.hadoop.hive.druid.io; -import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.net.URLEncoder; -import java.util.Arrays; -import java.util.List; - +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.google.common.collect.Lists; +import com.metamx.http.client.Request; +import io.druid.query.BaseQuery; +import io.druid.query.LocatedSegmentDescriptor; +import io.druid.query.Query; +import io.druid.query.SegmentDescriptor; +import io.druid.query.scan.ScanQuery; +import io.druid.query.select.PagingSpec; +import io.druid.query.select.SelectQuery; +import io.druid.query.spec.MultipleSpecificSegmentSpec; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -54,20 +60,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.google.common.collect.Lists; -import com.metamx.http.client.Request; - -import io.druid.query.BaseQuery; -import io.druid.query.LocatedSegmentDescriptor; -import io.druid.query.Query; -import io.druid.query.SegmentDescriptor; -import io.druid.query.scan.ScanQuery; -import io.druid.query.select.PagingSpec; -import io.druid.query.select.SelectQuery; -import io.druid.query.spec.MultipleSpecificSegmentSpec; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.net.URLEncoder; +import java.util.Arrays; +import java.util.List; /** * Druid query based input format. @@ -127,9 +125,9 @@ public static DruidQueryRecordReader getDruidQueryReader(String druidQueryType) if (dataSource == null || dataSource.isEmpty()) { throw new IOException("Druid data source cannot be empty or null"); } - //@FIXME https://issues.apache.org/jira/browse/HIVE-19023 use scan instead of Select - druidQuery = DruidStorageHandlerUtils.createSelectStarQuery(dataSource); - druidQueryType = Query.SELECT; + + druidQuery = DruidStorageHandlerUtils.createScanAllQuery(dataSource); + druidQueryType = Query.SCAN; } else { druidQueryType = conf.get(Constants.DRUID_QUERY_TYPE); if (druidQueryType == null) { @@ -210,7 +208,7 @@ public static DruidQueryRecordReader getDruidQueryReader(String druidQueryType) private static HiveDruidSplit[] distributeScanQuery(Configuration conf, String address, ScanQuery query, Path dummyPath) throws IOException { // If it has a limit, we use it and we do not distribute the query - final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false); + final boolean isFetch = query.getLimit() < Long.MAX_VALUE; if (isFetch) { return new HiveDruidSplit[] { new HiveDruidSplit( DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidScanQueryRecordReader.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidScanQueryRecordReader.java index 68ac88c09f..64c640f45a 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidScanQueryRecordReader.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidScanQueryRecordReader.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.druid.serde; -import io.druid.query.Result; import io.druid.query.scan.ScanQuery; import io.druid.query.scan.ScanResultValue; diff --git druid-handler/src/test/org/apache/hadoop/hive/druid/DruidStorageHandlerUtilsTest.java druid-handler/src/test/org/apache/hadoop/hive/druid/DruidStorageHandlerUtilsTest.java deleted file mode 100644 index d079e4f031..0000000000 --- druid-handler/src/test/org/apache/hadoop/hive/druid/DruidStorageHandlerUtilsTest.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.druid; - -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; - -public class DruidStorageHandlerUtilsTest { - - @Test public void testCreateSelectStarQuery() throws IOException { - Assert.assertTrue("this should not be null", - DruidStorageHandlerUtils.createSelectStarQuery("dummy_ds").contains("dummy_ds")); - } -} \ No newline at end of file diff --git ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out index cf8161f4cb..c3679a328e 100644 --- ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out @@ -740,7 +740,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### 0 -0 PREHOOK: query: EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table WHERE EXTRACT(MINUTE from `__time`) >= 0 LIMIT 2 PREHOOK: type: QUERY @@ -862,7 +861,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### 31 4 31 -31 4 31 PREHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table WHERE EXTRACT(WEEK from `__time`) >= 1 AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 PREHOOK: type: QUERY @@ -900,7 +898,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### 1 -1 PREHOOK: query: EXPLAIN SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 PREHOOK: type: QUERY @@ -938,7 +935,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### 4.0 12 12 -4.0 12 12 PREHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table WHERE EXTRACT(QUARTER from `__time`) >= 4 AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 PREHOOK: type: QUERY @@ -976,7 +972,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### 4 4.0 -4 4.0 PREHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) AS year_str FROM druid_table WHERE EXTRACT(YEAR from `__time`) >= 1969 AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 PREHOOK: type: QUERY @@ -1014,7 +1009,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table POSTHOOK: Output: hdfs://### HDFS PATH ### 1969 1969 -1969 1969 PREHOOK: query: DROP TABLE druid_table PREHOOK: type: DROPTABLE PREHOOK: Input: default@druid_table diff --git ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out index 8c70363bfc..97d07049fe 100644 --- ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out @@ -741,7 +741,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n2 POSTHOOK: Output: hdfs://### HDFS PATH ### 0 -0 PREHOOK: query: EXPLAIN SELECT EXTRACT(MINUTE from `__time`) FROM druid_table_n2 WHERE EXTRACT(MINUTE from `__time`) >= 0 LIMIT 2 PREHOOK: type: QUERY @@ -863,7 +862,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n2 POSTHOOK: Output: hdfs://### HDFS PATH ### 31 4 31 -31 4 31 PREHOOK: query: EXPLAIN SELECT EXTRACT(WEEK from `__time`) FROM druid_table_n2 WHERE EXTRACT(WEEK from `__time`) >= 1 AND EXTRACT(WEEK from `__time`) DIV 4 + 1 = 1 LIMIT 1 PREHOOK: type: QUERY @@ -901,7 +899,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n2 POSTHOOK: Output: hdfs://### HDFS PATH ### 1 -1 PREHOOK: query: EXPLAIN SELECT EXTRACT(MONTH FROM `__time`) / 4 + 1, EXTRACT(MONTH FROM `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 6, 2) as month_str FROM druid_table_n2 WHERE EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 AND EXTRACT(MONTH FROM `__time`) BETWEEN 11 AND 12 LIMIT 1 PREHOOK: type: QUERY @@ -939,7 +936,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n2 POSTHOOK: Output: hdfs://### HDFS PATH ### 4.0 12 12 -4.0 12 12 PREHOOK: query: EXPLAIN SELECT EXTRACT(QUARTER from `__time`), EXTRACT(MONTH FROM `__time`) / 4 + 1 as q_number FROM druid_table_n2 WHERE EXTRACT(QUARTER from `__time`) >= 4 AND EXTRACT(MONTH FROM `__time`) / 4 + 1 = 4 LIMIT 1 PREHOOK: type: QUERY @@ -977,7 +973,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n2 POSTHOOK: Output: hdfs://### HDFS PATH ### 4 4.0 -4 4.0 PREHOOK: query: EXPLAIN SELECT EXTRACT(YEAR from `__time`), SUBSTRING(CAST(CAST(`__time` AS DATE) AS STRING), 1, 4) AS year_str FROM druid_table_n2 WHERE EXTRACT(YEAR from `__time`) >= 1969 AND CAST(EXTRACT(YEAR from `__time`) as STRING) = '1969' LIMIT 1 PREHOOK: type: QUERY @@ -1015,7 +1010,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_n2 POSTHOOK: Output: hdfs://### HDFS PATH ### 1969 1969 -1969 1969 PREHOOK: query: DROP TABLE druid_table_n2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@druid_table_n2