From 629e0d3b962452784b2dfea48a8a92d805fb2666 Mon Sep 17 00:00:00 2001 From: Nishant Date: Fri, 18 May 2018 23:29:55 +0530 Subject: [PATCH] [HIVE-19604] Fix parsing of Boolean Value from Numeric Values --- .../apache/hadoop/hive/druid/serde/DruidSerDe.java | 6 ++- .../hadoop/hive/druid/serde/TestDruidSerDe.java | 23 ++++++----- .../queries/clientpositive/druidmini_expressions.q | 6 +++ .../druid/druidmini_expressions.q.out | 47 ++++++++++++++++++++++ 4 files changed, 70 insertions(+), 12 deletions(-) diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java index 5f7657975a..3b06770be2 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java @@ -461,7 +461,11 @@ public Object deserialize(Writable writable) throws SerDeException { output.add(new Text(value.toString())); break; case BOOLEAN: - output.add(new BooleanWritable(Boolean.valueOf(value.toString()))); + if (value instanceof Number) { + output.add(new BooleanWritable(((Number) value).intValue() != 0)); + } else { + output.add(new BooleanWritable(Boolean.valueOf(value.toString()))); + } break; default: throw new SerDeException("Unknown type: " + types[i].getPrimitiveCategory()); diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java index e45de0f93f..922c1db9fb 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java @@ -74,6 +74,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; @@ -462,7 +463,7 @@ + " \"offset\" : 0, " + " \"event\" : { " + " \"timestamp\" : \"2013-01-01T00:00:00.000Z\", " - + " \"robot\" : \"1\", " + + " \"robot\" : 1, " + " \"namespace\" : \"article\", " + " \"anonymous\" : \"0\", " + " \"unpatrolled\" : \"0\", " @@ -481,7 +482,7 @@ + " \"offset\" : 1, " + " \"event\" : { " + " \"timestamp\" : \"2013-01-01T00:00:00.000Z\", " - + " \"robot\" : \"0\", " + + " \"robot\" : 0, " + " \"namespace\" : \"article\", " + " \"anonymous\" : \"0\", " + " \"unpatrolled\" : \"0\", " @@ -500,7 +501,7 @@ + " \"offset\" : 2, " + " \"event\" : { " + " \"timestamp\" : \"2013-01-01T00:00:12.000Z\", " - + " \"robot\" : \"0\", " + + " \"robot\" : 0, " + " \"namespace\" : \"article\", " + " \"anonymous\" : \"0\", " + " \"unpatrolled\" : \"0\", " @@ -519,7 +520,7 @@ + " \"offset\" : 3, " + " \"event\" : { " + " \"timestamp\" : \"2013-01-01T00:00:12.000Z\", " - + " \"robot\" : \"0\", " + + " \"robot\" : 0, " + " \"namespace\" : \"article\", " + " \"anonymous\" : \"0\", " + " \"unpatrolled\" : \"0\", " @@ -538,7 +539,7 @@ + " \"offset\" : 4, " + " \"event\" : { " + " \"timestamp\" : \"2013-01-01T00:00:12.000Z\", " - + " \"robot\" : \"0\", " + + " \"robot\" : 0, " + " \"namespace\" : \"article\", " + " \"anonymous\" : \"0\", " + " \"unpatrolled\" : \"0\", " @@ -556,30 +557,30 @@ // Select query results as records (types defined by metastore) private static final String SELECT_COLUMN_NAMES = "__time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted"; - private static final String SELECT_COLUMN_TYPES = "timestamp with local time zone,string,string,string,string,string,string,string,string,double,double,float,float,float"; + private static final String SELECT_COLUMN_TYPES = "timestamp with local time zone,boolean,string,string,string,string,string,string,string,double,double,float,float,float"; private static final Object[][] SELECT_QUERY_RESULTS_RECORDS = new Object[][] { - new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new Text("1"), + new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new BooleanWritable(true), new Text("article"), new Text("0"), new Text("0"), new Text("11._korpus_(NOVJ)"), new Text("sl"), new Text("0"), new Text("EmausBot"), new DoubleWritable(1.0d), new DoubleWritable(39.0d), new FloatWritable(39.0F), new FloatWritable(39.0F), new FloatWritable(0.0F) }, - new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new Text("0"), + new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false), new Text("article"), new Text("0"), new Text("0"), new Text("112_U.S._580"), new Text("en"), new Text("1"), new Text("MZMcBride"), new DoubleWritable(1.0d), new DoubleWritable(70.0d), new FloatWritable(70.0F), new FloatWritable(70.0F), new FloatWritable(0.0F) }, - new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new Text("0"), + new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false), new Text("article"), new Text("0"), new Text("0"), new Text("113_U.S._243"), new Text("en"), new Text("1"), new Text("MZMcBride"), new DoubleWritable(1.0d), new DoubleWritable(77.0d), new FloatWritable(77.0F), new FloatWritable(77.0F), new FloatWritable(0.0F) }, - new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new Text("0"), + new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false), new Text("article"), new Text("0"), new Text("0"), new Text("113_U.S._73"), new Text("en"), new Text("1"), new Text("MZMcBride"), new DoubleWritable(1.0d), new DoubleWritable(70.0d), new FloatWritable(70.0F), new FloatWritable(70.0F), new FloatWritable(0.0F) }, - new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new Text("0"), + new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false), new Text("article"), new Text("0"), new Text("0"), new Text("113_U.S._756"), new Text("en"), new Text("1"), new Text("MZMcBride"), new DoubleWritable(1.0d), new DoubleWritable(68.0d), new FloatWritable(68.0F), diff --git a/ql/src/test/queries/clientpositive/druidmini_expressions.q b/ql/src/test/queries/clientpositive/druidmini_expressions.q index 882d7afacd..f5025fddaa 100644 --- a/ql/src/test/queries/clientpositive/druidmini_expressions.q +++ b/ql/src/test/queries/clientpositive/druidmini_expressions.q @@ -47,7 +47,13 @@ EXPLAIN SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ct EXPLAIN SELECT cstring1 || '_'|| cstring2, substring(cstring2, 2, 3) as concat , upper(cstring2), lower(cstring1), SUM(cdouble) as s FROM druid_table WHERE cstring1 IS NOT NULL AND cstring2 IS NOT NULL AND cstring2 like 'Y%' GROUP BY cstring1 || '_'|| cstring2, substring(cstring2, 2, 3), upper(cstring2), lower(cstring1) ORDER BY concat DESC LIMIT 10; + -- Boolean Values +SELECT cboolean2, count(*) from druid_table GROUP BY cboolean2; + -- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319 + -- It should get fixed once we upgrade calcite +SELECT ctinyint > 2, count(*) from druid_table GROUP BY ctinyint > 2; +EXPLAIN SELECT ctinyint > 2, count(*) from druid_table GROUP BY ctinyint > 2; DROP TABLE druid_table; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index a6fa042de7..1812b1a3bf 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -257,6 +257,53 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 ListSink +PREHOOK: query: -- Boolean Values +SELECT cboolean2, count(*) from druid_table GROUP BY cboolean2 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: -- Boolean Values +SELECT cboolean2, count(*) from druid_table GROUP BY cboolean2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL 8 +false 3140 +true 2957 +PREHOOK: query: SELECT ctinyint > 2, count(*) from druid_table GROUP BY ctinyint > 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT ctinyint > 2, count(*) from druid_table GROUP BY ctinyint > 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +false 2653 +false 3452 +PREHOOK: query: EXPLAIN SELECT ctinyint > 2, count(*) from druid_table GROUP BY ctinyint > 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT ctinyint > 2, count(*) from druid_table GROUP BY ctinyint > 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table + properties: + druid.fieldNames vc,$f1 + druid.fieldTypes boolean,bigint + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"(\"ctinyint\" > 2)","outputType":"FLOAT"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"count","name":"$f1"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Select Operator + expressions: vc (type: boolean), $f1 (type: bigint) + outputColumnNames: _col0, _col1 + ListSink + PREHOOK: query: DROP TABLE druid_table PREHOOK: type: DROPTABLE PREHOOK: Input: default@druid_table -- 2.15.1 (Apple Git-101)