diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 69a18cd..ffc134f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -677,7 +677,9 @@ public static TypeInfo getTypeInfoForPrimitiveCategory( } /** - * Find a common class for union-all operator + * Find a common type for union-all operator. Only the common types for the same + * type group will resolve to a common type. No implicit conversion across different + * type groups will be done. */ public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) { if (a.equals(b)) { @@ -696,26 +698,21 @@ public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) { PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA); PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB); - // handle string types properly - if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) { - return getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + if (pgA != pgB) { + return null; } - if (TypeInfoUtils.implicitConvertible(a, b)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcB); - } - if (TypeInfoUtils.implicitConvertible(b, a)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); - } - for (PrimitiveCategory t : TypeInfoUtils.numericTypeList) { - if (TypeInfoUtils.implicitConvertible(pcA, t) - && TypeInfoUtils.implicitConvertible(pcB, t)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); - } + switch(pgA) { + case STRING_GROUP: + return getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + case NUMERIC_GROUP: + return TypeInfoUtils.implicitConvertible(a, b) ? b : a; + case DATE_GROUP: + return TypeInfoFactory.timestampTypeInfo; + default: + return null; } - - return null; } /** diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java index 59ecd1e..d2d5a1b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -327,14 +327,12 @@ private void unionAll(TypeInfo a, TypeInfo b, TypeInfo result) { } public void testCommonClassUnionAll() { + unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.intTypeInfo, + TypeInfoFactory.doubleTypeInfo); unionAll(TypeInfoFactory.intTypeInfo, TypeInfoFactory.decimalTypeInfo, TypeInfoFactory.decimalTypeInfo); - unionAll(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, - TypeInfoFactory.stringTypeInfo); unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.decimalTypeInfo, TypeInfoFactory.doubleTypeInfo); - unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, - TypeInfoFactory.stringTypeInfo); unionAll(varchar5, varchar10, varchar10); unionAll(varchar10, varchar5, varchar10); @@ -346,8 +344,13 @@ public void testCommonClassUnionAll() { unionAll(char10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); unionAll(TypeInfoFactory.stringTypeInfo, char10, TypeInfoFactory.stringTypeInfo); - // common class for char/varchar is string? - comparison(char10, varchar5, TypeInfoFactory.stringTypeInfo); + unionAll(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.dateTypeInfo, + TypeInfoFactory.timestampTypeInfo); + + // Invalid cases + unionAll(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, null); + unionAll(TypeInfoFactory.doubleTypeInfo, varchar10, null); + } public void testGetTypeInfoForPrimitiveCategory() { diff --git a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q index 67fdd23..f0a00fb 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q @@ -134,14 +134,14 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1; INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1; SELECT * FROM outputTbl1; diff --git a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q index 39b9420..38384dc 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q @@ -100,12 +100,12 @@ SELECT * FROM outputTbl3; -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key; INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key; @@ -135,14 +135,14 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1; INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1; SELECT * FROM outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union32.q b/ql/src/test/queries/clientpositive/union32.q index f47f0af..f414588 100644 --- a/ql/src/test/queries/clientpositive/union32.q +++ b/ql/src/test/queries/clientpositive/union32.q @@ -52,13 +52,13 @@ SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ; SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ; @@ -68,11 +68,11 @@ EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ; SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ; diff --git a/ql/src/test/queries/clientpositive/unionDistinct_1.q b/ql/src/test/queries/clientpositive/unionDistinct_1.q index fd7f075..9ade188 100644 --- a/ql/src/test/queries/clientpositive/unionDistinct_1.q +++ b/ql/src/test/queries/clientpositive/unionDistinct_1.q @@ -910,13 +910,13 @@ SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS STRING) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ; SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ; @@ -926,13 +926,13 @@ EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ; SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ; -- union33.q @@ -949,7 +949,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a; @@ -958,7 +958,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a; @@ -966,7 +966,7 @@ SELECT COUNT(*) FROM test_src; EXPLAIN INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src @@ -975,7 +975,7 @@ UNION DISTINCT INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 81fe0d9..e70f912 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -2791,7 +2791,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by @@ -2800,7 +2800,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -2902,23 +2902,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Select Operator + expressions: UDFToString(_col0) (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -2937,47 +2941,43 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false TableScan GatherStats: false Union @@ -3027,7 +3027,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types double,bigint + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3036,7 +3036,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types double,bigint + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3310,7 +3310,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -3319,7 +3319,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 5cf0ea2..fc52984 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -2330,7 +2330,7 @@ PREHOOK: query: -- it should not matter what follows the group by -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY @@ -2340,7 +2340,7 @@ POSTHOOK: query: -- it should not matter what follows the group by -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY @@ -2514,7 +2514,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + expressions: UDFToInteger(UDFToString(_col0)) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2581,14 +2581,14 @@ STAGE PLANS: #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY @@ -3055,7 +3055,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by @@ -3064,7 +3064,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -3232,23 +3232,27 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Select Operator + expressions: UDFToString(_col0) (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -3267,47 +3271,43 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false TableScan GatherStats: false Union @@ -3357,7 +3357,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types double,bigint + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3366,7 +3366,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types double,bigint + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3640,7 +3640,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -3649,7 +3649,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 diff --git a/ql/src/test/results/clientpositive/union32.q.out b/ql/src/test/results/clientpositive/union32.q.out index a3fefa8..3f55aaa 100644 --- a/ql/src/test/results/clientpositive/union32.q.out +++ b/ql/src/test/results/clientpositive/union32.q.out @@ -454,14 +454,14 @@ POSTHOOK: Input: default@t2 PREHOOK: query: -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a PREHOOK: type: QUERY POSTHOOK: query: -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY @@ -514,7 +514,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(UDFToDouble(_col1)) (type: string) + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(CAST( _col1 AS CHAR(20)) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -561,7 +561,7 @@ STAGE PLANS: ListSink PREHOOK: query: SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a PREHOOK: type: QUERY @@ -569,7 +569,7 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY @@ -579,48 +579,48 @@ POSTHOOK: Input: default@t2 0.0 0 0.0 0 0.0 0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 2.0 2 -2.0 2.0 +2.0 2 +4.0 4 4.0 4 -4.0 4.0 5.0 5 5.0 5 5.0 5 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +8.0 8 8.0 8 -8.0 8.0 9.0 9 -9.0 9.0 +9.0 9 PREHOOK: query: -- Test union with join on the right selecting multiple columns EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a PREHOOK: type: QUERY POSTHOOK: query: -- Test union with join on the right selecting multiple columns EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage @@ -671,7 +671,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToDouble(_col1) (type: double) + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(CAST( _col1 AS CHAR(20)) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -688,7 +688,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) + expressions: UDFToDouble(key) (type: double), key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union @@ -720,7 +720,7 @@ STAGE PLANS: PREHOOK: query: SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 @@ -728,40 +728,40 @@ PREHOOK: Input: default@t2 POSTHOOK: query: SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -0.0 0.0 -2.0 2.0 -2.0 2.0 -4.0 4.0 -4.0 4.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -5.0 5.0 -8.0 8.0 -8.0 8.0 -9.0 9.0 -9.0 9.0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +0.0 0 +2.0 2 +2.0 2 +4.0 4 +4.0 4 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +5.0 5 +8.0 8 +8.0 8 +9.0 9 +9.0 9 diff --git a/ql/src/test/results/clientpositive/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/unionDistinct_1.q.out index 0330133..db85250 100644 --- a/ql/src/test/results/clientpositive/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/unionDistinct_1.q.out @@ -14190,14 +14190,14 @@ POSTHOOK: Input: default@t2 PREHOOK: query: -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS STRING) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a PREHOOK: type: QUERY POSTHOOK: query: -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS STRING) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY @@ -14271,7 +14271,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(UDFToDouble(_col1)) (type: string) + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Union @@ -14309,7 +14309,7 @@ STAGE PLANS: ListSink PREHOOK: query: SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a PREHOOK: type: QUERY @@ -14317,7 +14317,7 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY @@ -14325,30 +14325,24 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### 0.0 0 -0.0 0.0 2.0 2 -2.0 2.0 4.0 4 -4.0 4.0 5.0 5 -5.0 5.0 8.0 8 -8.0 8.0 9.0 9 -9.0 9.0 PREHOOK: query: -- Test union with join on the right selecting multiple columns EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a PREHOOK: type: QUERY POSTHOOK: query: -- Test union with join on the right selecting multiple columns EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-6 is a root stage @@ -14386,20 +14380,20 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) + expressions: UDFToDouble(key) (type: double), key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: double), _col1 (type: double) + keys: _col0 (type: double), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE TableScan alias: t2 @@ -14420,26 +14414,26 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToDouble(_col1) (type: double) + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(CAST( _col1 AS varchar(20))) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: double), _col1 (type: double) + keys: _col0 (type: double), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: double), KEY._col1 (type: double) + keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -14460,7 +14454,7 @@ STAGE PLANS: PREHOOK: query: SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 @@ -14468,17 +14462,17 @@ PREHOOK: Input: default@t2 POSTHOOK: query: SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -0.0 0.0 -2.0 2.0 -4.0 4.0 -5.0 5.0 -8.0 8.0 -9.0 9.0 +0.0 0 +2.0 2 +4.0 4 +5.0 5 +8.0 8 +9.0 9 PREHOOK: query: -- union33.q -- SORT_BEFORE_DIFF @@ -14508,7 +14502,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a PREHOOK: type: QUERY @@ -14517,7 +14511,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a POSTHOOK: type: QUERY @@ -14639,7 +14633,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a PREHOOK: type: QUERY @@ -14650,7 +14644,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a POSTHOOK: type: QUERY @@ -14669,7 +14663,7 @@ POSTHOOK: Input: default@test_src 310 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src @@ -14678,7 +14672,7 @@ UNION DISTINCT PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src @@ -14800,7 +14794,7 @@ STAGE PLANS: PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src @@ -14811,7 +14805,7 @@ PREHOOK: Input: default@src PREHOOK: Output: default@test_src POSTHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index 8f7b799..14349fa 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -874,6 +874,9 @@ public static void registerNumericType(PrimitiveCategory primitiveCategory, int numericTypes.put(primitiveCategory, level); } + /** + * Test if it's implicitly convertible for data comparison. + */ public static boolean implicitConvertible(PrimitiveCategory from, PrimitiveCategory to) { if (from == to) { return true;