diff --git druid-handler/pom.xml druid-handler/pom.xml index 0db542e..e4fa8fd 100644 --- druid-handler/pom.xml +++ druid-handler/pom.xml @@ -84,6 +84,11 @@ + org.apache.calcite + calcite-druid + ${calcite.version} + + io.druid druid-processing ${druid.version} diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/HiveDruidQueryBasedInputFormat.java druid-handler/src/java/org/apache/hadoop/hive/druid/HiveDruidQueryBasedInputFormat.java index a18e590..cc4a8ef 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/HiveDruidQueryBasedInputFormat.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/HiveDruidQueryBasedInputFormat.java @@ -51,6 +51,7 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.joda.time.Interval; +import org.joda.time.chrono.ISOChronology; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -210,7 +211,7 @@ private static String createSelectStarQuery(String address, String dataSource) t final long numRows = metadataList.get(0).getNumRows(); - query = query.withPagingSpec(PagingSpec.newSpec(selectThreshold)); + query = query.withPagingSpec(PagingSpec.newSpec(Integer.MAX_VALUE)); if (numRows <= selectThreshold) { // We are not going to split it return new HiveDruidSplit[] { new HiveDruidSplit(address, @@ -221,8 +222,8 @@ private static String createSelectStarQuery(String address, String dataSource) t // a Time Boundary query. Then, we use the information to split the query // following the Select threshold configuration property final List intervals = new ArrayList<>(); - if (query.getIntervals().size() == 1 && - query.getIntervals().get(0).equals(DruidTable.DEFAULT_INTERVAL)) { + if (query.getIntervals().size() == 1 && query.getIntervals().get(0).withChronology( + ISOChronology.getInstanceUTC()).equals(DruidTable.DEFAULT_INTERVAL)) { // Default max and min, we should execute a time boundary query to get a // more precise range TimeBoundaryQueryBuilder timeBuilder = new Druids.TimeBoundaryQueryBuilder(); diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java index 49e096b..f97f820 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidGroupByQueryRecordReader.java @@ -127,8 +127,13 @@ public DruidWritable getCurrentValue() throws IOException, InterruptedException for (int i=0; i < query.getDimensions().size(); i++) { DimensionSpec ds = query.getDimensions().get(i); List dims = current.getDimension(ds.getDimension()); - int pos = dims.size() - indexes[i] - 1; - value.getValue().put(ds.getOutputName(), dims.get(pos)); + if (dims.size() == 0) { + // NULL value for dimension + value.getValue().put(ds.getOutputName(), null); + } else { + int pos = dims.size() - indexes[i] - 1; + value.getValue().put(ds.getOutputName(), dims.get(pos)); + } } int counter = 0; // 3) The aggregation columns @@ -161,8 +166,13 @@ public boolean next(NullWritable key, DruidWritable value) { for (int i=0; i < query.getDimensions().size(); i++) { DimensionSpec ds = query.getDimensions().get(i); List dims = current.getDimension(ds.getDimension()); - int pos = dims.size() - indexes[i] - 1; - value.getValue().put(ds.getOutputName(), dims.get(pos)); + if (dims.size() == 0) { + // NULL value for dimension + value.getValue().put(ds.getOutputName(), null); + } else { + int pos = dims.size() - indexes[i] - 1; + value.getValue().put(ds.getOutputName(), dims.get(pos)); + } } int counter = 0; // 3) The aggregation columns diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSelectQueryRecordReader.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSelectQueryRecordReader.java index fccf7c4..8b40d4d 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSelectQueryRecordReader.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSelectQueryRecordReader.java @@ -62,7 +62,7 @@ public boolean nextKeyValue() throws IOException { if (results.hasNext()) { current = results.next(); values = current.getValue().getEvents().iterator(); - return true; + return nextKeyValue(); } return false; } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java index 0b77a9b..22599c3 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidTopNQueryRecordReader.java @@ -62,7 +62,7 @@ public boolean nextKeyValue() { if (results.hasNext()) { current = results.next(); values = current.getValue().getValue().iterator(); - return true; + return nextKeyValue(); } return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index 7b9ee84..e36e1bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -82,7 +82,7 @@ static ASTNode table(RelNode scan) { dq.getQueryString()) + "\"")); propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY") .add(HiveParser.StringLiteral, "\"" + Constants.DRUID_QUERY_TYPE + "\"") - .add(HiveParser.StringLiteral, "\"" + dq.getQueryType() + "\"")); + .add(HiveParser.StringLiteral, "\"" + dq.getQueryType().getQueryName() + "\"")); } if (hts.isInsideView()) { // We need to carry the insideView information from calcite into the ast. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 714138a..78011c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -66,8 +66,8 @@ import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider; @@ -1827,23 +1827,29 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc } } - // 3.4 Build row type from field - RelDataType rowType = TypeConverter.getType(cluster, rr, null); - - // 4. Build RelOptAbstractTable - String fullyQualifiedTabName = tabMetaData.getDbName(); - if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) { - fullyQualifiedTabName = fullyQualifiedTabName + "." + tabMetaData.getTableName(); - } - else { - fullyQualifiedTabName = tabMetaData.getTableName(); - } - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, - rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, - partitionCache, noColsMissingStats); - - // 5. Build operator + // 4. Build operator if (tableType == TableType.DRUID) { + // Create case sensitive columns list + List originalColumnNames = + ((StandardStructObjectInspector)rowObjectInspector).getOriginalColumnNames(); + List cIList = new ArrayList(originalColumnNames.size()); + for (int i = 0; i < rr.getColumnInfos().size(); i++) { + cIList.add(new ColumnInfo(originalColumnNames.get(i), rr.getColumnInfos().get(i).getType(), + tableAlias, false)); + } + // Build row type from field + RelDataType rowType = TypeConverter.getType(cluster, cIList); + // Build RelOptAbstractTable + String fullyQualifiedTabName = tabMetaData.getDbName(); + if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) { + fullyQualifiedTabName = fullyQualifiedTabName + "." + tabMetaData.getTableName(); + } + else { + fullyQualifiedTabName = tabMetaData.getTableName(); + } + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, + rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, + partitionCache, noColsMissingStats); // Build Druid query String address = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS); @@ -1876,6 +1882,19 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc tableRel = DruidQuery.create(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, druidTable, ImmutableList.of(scan)); } else { + // Build row type from field + RelDataType rowType = TypeConverter.getType(cluster, rr, null); + // Build RelOptAbstractTable + String fullyQualifiedTabName = tabMetaData.getDbName(); + if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) { + fullyQualifiedTabName = fullyQualifiedTabName + "." + tabMetaData.getTableName(); + } + else { + fullyQualifiedTabName = tabMetaData.getTableName(); + } + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, + rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, + partitionCache, noColsMissingStats); // Build Hive Table Scan Rel tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, diff --git ql/src/test/results/clientpositive/druid_basic2.q.out ql/src/test/results/clientpositive/druid_basic2.q.out index 858f550..5c4359b 100644 --- ql/src/test/results/clientpositive/druid_basic2.q.out +++ ql/src/test/results/clientpositive/druid_basic2.q.out @@ -80,7 +80,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator @@ -109,7 +109,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":["delta"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator @@ -140,7 +140,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator @@ -171,7 +171,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":["robot"],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[{"type":"longSum","name":"dummy_agg","fieldName":"dummy_agg"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type GROUP_BY + druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator @@ -219,7 +219,7 @@ STAGE PLANS: filterExpr: language is not null (type: boolean) properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator @@ -244,7 +244,7 @@ STAGE PLANS: filterExpr: language is not null (type: boolean) properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator @@ -280,7 +280,7 @@ STAGE PLANS: columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -305,7 +305,7 @@ STAGE PLANS: columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -404,7 +404,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator @@ -419,7 +419,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Reduce Output Operator @@ -446,7 +446,7 @@ STAGE PLANS: columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select #### A masked pattern was here #### name default.druid_table_1 numFiles 0 @@ -471,7 +471,7 @@ STAGE PLANS: columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float druid.datasource wikipedia druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"dimensions":["robot"],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select #### A masked pattern was here #### name default.druid_table_1 numFiles 0 diff --git ql/src/test/results/clientpositive/druid_intervals.q.out ql/src/test/results/clientpositive/druid_intervals.q.out index 0cb373b..b9a8313 100644 --- ql/src/test/results/clientpositive/druid_intervals.q.out +++ ql/src/test/results/clientpositive/druid_intervals.q.out @@ -82,7 +82,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp) @@ -114,7 +114,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/2012-03-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp) @@ -146,7 +146,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2012-03-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp) @@ -180,7 +180,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2011-01-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp) @@ -212,7 +212,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2011-01-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp) @@ -246,7 +246,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2011-01-01T00:00:00.001Z","2012-01-01T00:00:00.000Z/2013-01-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp) @@ -280,7 +280,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2012-01-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp) @@ -312,7 +312,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z","2011-01-01T00:00:00.000Z/2011-01-01T00:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp) @@ -342,7 +342,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["2010-01-01T00:00:00.000Z/2010-01-01T00:00:00.001Z","2011-01-01T00:00:00.000Z/2011-01-01T00:00:00.001Z"],"filter":{"type":"selector","dimension":"robot","value":"user1"},"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), 'user1' (type: string) @@ -373,7 +373,7 @@ STAGE PLANS: filterExpr: ((__time) IN ('2010-01-01 00:00:00', '2011-01-01 00:00:00') or (robot = 'user1')) (type: boolean) properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((__time) IN ('2010-01-01 00:00:00', '2011-01-01 00:00:00') or (robot = 'user1')) (type: boolean) diff --git ql/src/test/results/clientpositive/druid_timeseries.q.out ql/src/test/results/clientpositive/druid_timeseries.q.out index 3708fba..1ed5573 100644 --- ql/src/test/results/clientpositive/druid_timeseries.q.out +++ ql/src/test/results/clientpositive/druid_timeseries.q.out @@ -82,7 +82,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"longMax","name":"$f0","fieldName":"added"},{"type":"doubleSum","name":"$f1","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: $f0 (type: bigint), $f1 (type: float) @@ -114,7 +114,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"NONE","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -146,7 +146,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"YEAR","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -178,7 +178,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"QUARTER","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -210,7 +210,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"MONTH","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -242,7 +242,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"WEEK","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -274,7 +274,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"DAY","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -306,7 +306,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"HOUR","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -338,7 +338,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"MINUTE","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -370,7 +370,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"SECOND","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -404,7 +404,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"HOUR","filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type TIMESERIES + druid.query.type timeseries Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp), $f1 (type: bigint), $f2 (type: float) @@ -443,7 +443,7 @@ STAGE PLANS: filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) @@ -522,7 +522,7 @@ STAGE PLANS: filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) diff --git ql/src/test/results/clientpositive/druid_topn.q.out ql/src/test/results/clientpositive/druid_topn.q.out index 51f1fb5..9a7ed6c 100644 --- ql/src/test/results/clientpositive/druid_topn.q.out +++ ql/src/test/results/clientpositive/druid_topn.q.out @@ -88,7 +88,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"all","dimension":"robot","metric":"$f1","aggregations":[{"type":"longMax","name":"$f1","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":100} - druid.query.type TOP_N + druid.query.type topN Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: robot (type: string), $f1 (type: bigint), $f2 (type: float) @@ -124,7 +124,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"NONE","dimension":"robot","metric":"$f3","aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":100} - druid.query.type TOP_N + druid.query.type topN Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) @@ -160,7 +160,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"topN","dataSource":"wikipedia","granularity":"YEAR","dimension":"robot","metric":"$f3","aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"threshold":10} - druid.query.type TOP_N + druid.query.type topN Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) @@ -196,7 +196,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f3","direction":"ascending"}]},"aggregations":[{"type":"longMax","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type GROUP_BY + druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: robot (type: string), __time (type: timestamp), $f2 (type: bigint), $f3 (type: float) @@ -232,7 +232,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot","namespace"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f4","direction":"descending"},{"dimension":"$f3","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type GROUP_BY + druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float) @@ -268,7 +268,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"MONTH","dimensions":["robot","namespace"],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending"},{"dimension":"$f3","direction":"descending"}]},"aggregations":[{"type":"longMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type GROUP_BY + druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: robot (type: string), __time (type: timestamp), $f3 (type: bigint), $f4 (type: float) @@ -306,7 +306,7 @@ STAGE PLANS: alias: druid_table_1 properties: druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"YEAR","dimensions":[],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f2","direction":"ascending"}]},"filter":{"type":"selector","dimension":"robot","value":"1"},"aggregations":[{"type":"longMax","name":"$f1_0","fieldName":"added"},{"type":"doubleSum","name":"$f2","fieldName":"variation"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type GROUP_BY + druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: '1' (type: string), __time (type: timestamp), $f1_0 (type: bigint), $f2 (type: float) @@ -350,7 +350,7 @@ STAGE PLANS: filterExpr: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) properties: druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384},"context":{"druid.query.fetch":false}} - druid.query.type SELECT + druid.query.type select Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: floor_hour(__time) BETWEEN 2010-01-01 00:00:00.0 AND 2014-01-01 00:00:00.0 (type: boolean) diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java index 227e8a9..3b8295a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java @@ -89,6 +89,8 @@ public String toString() { protected List fields; + protected List originalColumnNames; + protected StandardStructObjectInspector() { super(); } @@ -114,10 +116,12 @@ protected void init(List structFieldNames, List structFieldComments) { fields = new ArrayList(structFieldNames.size()); + originalColumnNames = new ArrayList(structFieldNames.size()); for (int i = 0; i < structFieldNames.size(); i++) { fields.add(new MyField(i, structFieldNames.get(i), structFieldObjectInspectors.get(i), structFieldComments == null ? null : structFieldComments.get(i))); + originalColumnNames.add(structFieldNames.get(i)); } } @@ -127,9 +131,11 @@ protected StandardStructObjectInspector(List fields) { protected void init(List fields) { this.fields = new ArrayList(fields.size()); + this.originalColumnNames = new ArrayList(fields.size()); for (int i = 0; i < fields.size(); i++) { this.fields.add(new MyField(i, fields.get(i).getFieldName(), fields .get(i).getFieldObjectInspector())); + this.originalColumnNames.add(fields.get(i).getFieldName()); } } @@ -209,6 +215,10 @@ public Object getStructFieldData(Object data, StructField fieldRef) { return list; } + public List getOriginalColumnNames() { + return originalColumnNames; + } + // ///////////////////////////// // SettableStructObjectInspector @Override