From 53ea036450007b123cba552b00e2f56072958c0c Mon Sep 17 00:00:00 2001 From: Nishant Date: Mon, 29 Jan 2018 21:17:21 +0530 Subject: [PATCH] [HIVE-18569] Fix handling of decimals while indexing data --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 3 +++ .../org/apache/hadoop/hive/druid/io/DruidOutputFormat.java | 12 +++++++++++- ql/src/test/queries/clientpositive/druidmini_mv.q | 4 ++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 13067dfc06..459527d994 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2109,6 +2109,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Wait time in ms default to 30 seconds." ), HIVE_DRUID_BITMAP_FACTORY_TYPE("hive.druid.bitmap.type", "roaring", new PatternSet("roaring", "concise"), "Coding algorithm use to encode the bitmaps"), + HIVE_DRUID_APPROX_RESULT("hive.druid.approx.result", false, + "Whether to allow approximate results from druid. \n" + + "When set to true decimals will be stored as double and druid is allowed to return approximate results for decimal columns."), // For HBase storage handler HIVE_HBASE_WAL_ENABLED("hive.hbase.wal.enabled", true, "Whether writes to HBase should be forced to the write-ahead log. \n" + diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java index 0977329b69..8c25d62860 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java @@ -129,6 +129,7 @@ } ArrayList columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + final boolean approximationAllowed = HiveConf.getBoolVar(jc, HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT); // Default, all columns that are not metrics or timestamp, are treated as dimensions final List dimensions = new ArrayList<>(); ImmutableList.Builder aggregatorFactoryBuilder = ImmutableList.builder(); @@ -145,9 +146,18 @@ break; case FLOAT: case DOUBLE: - case DECIMAL: af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i)); break; + case DECIMAL: + if (approximationAllowed) { + af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i)); + } else { + throw new UnsupportedOperationException( + String.format("Druid does not support decimal column type." + + "Either cast column [%s] to double or Enable Approximate Result for Druid by setting property [%s] to true", + columnNames.get(i), HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT.varname)); + } + break; case TIMESTAMP: // Granularity column String tColumnName = columnNames.get(i); diff --git a/ql/src/test/queries/clientpositive/druidmini_mv.q b/ql/src/test/queries/clientpositive/druidmini_mv.q index e059357602..9f8500f16f 100644 --- a/ql/src/test/queries/clientpositive/druidmini_mv.q +++ b/ql/src/test/queries/clientpositive/druidmini_mv.q @@ -18,7 +18,7 @@ CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, b, c +SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, b, cast(c as double) FROM cmv_basetable WHERE a = 2; @@ -30,7 +30,7 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS cmv_mat_view2 ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, c +SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, cast(c as double) FROM cmv_basetable WHERE a = 3; -- 2.11.0 (Apple Git-81)