From 4db9b8de8c825a3ab5451693f9fe49a8eae4bc85 Mon Sep 17 00:00:00 2001 From: Nishant Date: Fri, 1 Jun 2018 19:44:17 +0530 Subject: [PATCH] [HIVE-19762] Fix druid properties being overwritten from serdeInfo --- .../test/queries/clientpositive/druidmini_joins.q | 60 ++++++ .../clientpositive/druid/druidmini_joins.q.out | 230 +++++++++++++++++++++ .../hive/metastore/utils/MetaStoreUtils.java | 5 +- 3 files changed, 294 insertions(+), 1 deletion(-) create mode 100644 ql/src/test/queries/clientpositive/druidmini_joins.q create mode 100644 ql/src/test/results/clientpositive/druid/druidmini_joins.q.out diff --git a/ql/src/test/queries/clientpositive/druidmini_joins.q b/ql/src/test/queries/clientpositive/druidmini_joins.q new file mode 100644 index 0000000000..720127ed3f --- /dev/null +++ b/ql/src/test/queries/clientpositive/druidmini_joins.q @@ -0,0 +1,60 @@ +SET hive.vectorized.execution.enabled=false; +SET hive.explain.user=false; + +--SET hive.execution.mode=llap; + +DROP TABLE druid_table_with_nulls; + +CREATE TABLE druid_table_with_nulls +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR") +AS +SELECT cast(current_timestamp() AS timestamp with local time zone) AS `__time`, + cast(username AS string) AS username, + cast(double1 AS double) AS double1, + cast(int1 AS int) AS int1 +FROM TABLE ( + VALUES + ('alfred', 10.30, 2), + ('bob', 3.14, null), + ('bonnie', null, 3), + ('calvin', null, null), + ('charlie', 9.8, 1), + ('charlie', 15.8, 1)) as q (username, double1, int1); + +EXPLAIN SELECT +username AS `username`, +SUM(double1) AS `sum_double1` +FROM +druid_table_with_nulls `tbl1` + JOIN ( + SELECT + username AS `username`, + SUM(double1) AS `sum_double2` + FROM druid_table_with_nulls + GROUP BY `username` + ORDER BY `sum_double2` + DESC LIMIT 10 + ) + `tbl2` + ON (`tbl1`.`username` = `tbl2`.`username`) +GROUP BY `tbl1`.`username`; + + +SELECT +username AS `username`, +SUM(double1) AS `sum_double1` +FROM +druid_table_with_nulls `tbl1` + JOIN ( + SELECT + username AS `username`, + SUM(double1) AS `sum_double2` + FROM druid_table_with_nulls + GROUP BY `username` + ORDER BY `sum_double2` + DESC LIMIT 10 + ) + `tbl2` + ON (`tbl1`.`username` = `tbl2`.`username`) +GROUP BY `tbl1`.`username`; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/druid/druidmini_joins.q.out b/ql/src/test/results/clientpositive/druid/druidmini_joins.q.out new file mode 100644 index 0000000000..6ff87fb758 --- /dev/null +++ b/ql/src/test/results/clientpositive/druid/druidmini_joins.q.out @@ -0,0 +1,230 @@ +PREHOOK: query: DROP TABLE druid_table_with_nulls +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE druid_table_with_nulls +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE druid_table_with_nulls +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR") +AS +SELECT cast(current_timestamp() AS timestamp with local time zone) AS `__time`, + cast(username AS string) AS username, + cast(double1 AS double) AS double1, + cast(int1 AS int) AS int1 +FROM TABLE ( + VALUES + ('alfred', 10.30, 2), + ('bob', 3.14, null), + ('bonnie', null, 3), + ('calvin', null, null), + ('charlie', 9.8, 1), + ('charlie', 15.8, 1)) as q (username, double1, int1) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table_with_nulls +POSTHOOK: query: CREATE TABLE druid_table_with_nulls +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR") +AS +SELECT cast(current_timestamp() AS timestamp with local time zone) AS `__time`, + cast(username AS string) AS username, + cast(double1 AS double) AS double1, + cast(int1 AS int) AS int1 +FROM TABLE ( + VALUES + ('alfred', 10.30, 2), + ('bob', 3.14, null), + ('bonnie', null, 3), + ('calvin', null, null), + ('charlie', 9.8, 1), + ('charlie', 15.8, 1)) as q (username, double1, int1) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table_with_nulls +POSTHOOK: Lineage: druid_table_with_nulls.__time SIMPLE [] +POSTHOOK: Lineage: druid_table_with_nulls.double1 SCRIPT [] +POSTHOOK: Lineage: druid_table_with_nulls.int1 SCRIPT [] +POSTHOOK: Lineage: druid_table_with_nulls.username SCRIPT [] +PREHOOK: query: EXPLAIN SELECT +username AS `username`, +SUM(double1) AS `sum_double1` +FROM +druid_table_with_nulls `tbl1` + JOIN ( + SELECT + username AS `username`, + SUM(double1) AS `sum_double2` + FROM druid_table_with_nulls + GROUP BY `username` + ORDER BY `sum_double2` + DESC LIMIT 10 + ) + `tbl2` + ON (`tbl1`.`username` = `tbl2`.`username`) +GROUP BY `tbl1`.`username` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT +username AS `username`, +SUM(double1) AS `sum_double1` +FROM +druid_table_with_nulls `tbl1` + JOIN ( + SELECT + username AS `username`, + SUM(double1) AS `sum_double2` + FROM druid_table_with_nulls + GROUP BY `username` + ORDER BY `sum_double2` + DESC LIMIT 10 + ) + `tbl2` + ON (`tbl1`.`username` = `tbl2`.`username`) +GROUP BY `tbl1`.`username` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tbl1 + properties: + druid.fieldNames username,$f1 + druid.fieldTypes string,double + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_with_nulls","granularity":"all","dimensions":[{"type":"default","dimension":"username","outputName":"username","outputType":"STRING"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f1","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"double1"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: username (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: tbl1 + properties: + druid.fieldNames username,double1 + druid.fieldTypes string,double + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_with_nulls","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"username","value":null}},"columns":["username","double1"],"resultFormat":"compactedList"} + druid.query.type scan + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: username (type: string) + sort order: + + Map-reduce partition columns: username (type: string) + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + value expressions: double1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 username (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 633 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 633 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT +username AS `username`, +SUM(double1) AS `sum_double1` +FROM +druid_table_with_nulls `tbl1` + JOIN ( + SELECT + username AS `username`, + SUM(double1) AS `sum_double2` + FROM druid_table_with_nulls + GROUP BY `username` + ORDER BY `sum_double2` + DESC LIMIT 10 + ) + `tbl2` + ON (`tbl1`.`username` = `tbl2`.`username`) +GROUP BY `tbl1`.`username` +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_with_nulls +#### A masked pattern was here #### +POSTHOOK: query: SELECT +username AS `username`, +SUM(double1) AS `sum_double1` +FROM +druid_table_with_nulls `tbl1` + JOIN ( + SELECT + username AS `username`, + SUM(double1) AS `sum_double2` + FROM druid_table_with_nulls + GROUP BY `username` + ORDER BY `sum_double2` + DESC LIMIT 10 + ) + `tbl2` + ON (`tbl1`.`username` = `tbl2`.`username`) +GROUP BY `tbl1`.`username` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_with_nulls +#### A masked pattern was here #### +alfred 10.300000190734863 +bob 3.140000104904175 +bonnie 0.0 +charlie 25.600000381469727 +calvin 0.0 diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 742b6bf76b..9b36d09eb9 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -1282,7 +1282,10 @@ public static Properties getPartSchemaFromTableSchema( for (Map.Entry param : sd.getSerdeInfo().getParameters().entrySet()) { String key = param.getKey(); if (schema.get(key) != null && - (key.equals(cols) || key.equals(colTypes) || key.equals(parts))) { + (key.equals(cols) || key.equals(colTypes) || key.equals(parts) || + // skip Druid properties which are used in DruidSerde, since they are also updated + // after SerDeInfo properties are copied. + key.startsWith("druid."))) { continue; } schema.put(key, (param.getValue() != null) ? param.getValue() : StringUtils.EMPTY); -- 2.15.1 (Apple Git-101)