diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index bc3b8d6..ec4e363 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7707,6 +7707,10 @@ private Operator genMapGroupByForSemijoin(QB qb, ArrayList fields, // t keys[i][k].getTypeInfo(), commonType)) { keys[i][k] = ParseUtils.createConversionCast( keys[i][k], (PrimitiveTypeInfo)commonType); + } else { + // For the case no implicit type conversion, e.g., varchar(5) and varchar(10), + // pick the common type for all the keys since during run-time, same key type is assumed. + keys[i][k].setTypeInfo(commonType); } } } diff --git ql/src/test/queries/clientpositive/join_on_varchar.q ql/src/test/queries/clientpositive/join_on_varchar.q new file mode 100644 index 0000000..2cf261e --- /dev/null +++ ql/src/test/queries/clientpositive/join_on_varchar.q @@ -0,0 +1,12 @@ +-- SORT_QUERY_RESULTS + +create table tbl1(c1 varchar(10), intcol int); +create table tbl2(c2 varchar(30)); +insert into table tbl1 select repeat('t', 10), 11 from src limit 1; +insert into table tbl1 select repeat('s', 10), 22 from src limit 1; +insert into table tbl2 select concat(repeat('t', 10), 'ppp') from src limit 1; +insert into table tbl2 select repeat('s', 10) from src limit 1; + +explain +select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2; +select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2; diff --git ql/src/test/results/clientpositive/join_on_varchar.q.out ql/src/test/results/clientpositive/join_on_varchar.q.out new file mode 100644 index 0000000..7d0f551 --- /dev/null +++ ql/src/test/results/clientpositive/join_on_varchar.q.out @@ -0,0 +1,146 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table tbl1(c1 varchar(10), intcol int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table tbl1(c1 varchar(10), intcol int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: create table tbl2(c2 varchar(30)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: create table tbl2(c2 varchar(30)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert into table tbl1 select repeat('t', 10), 11 from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +POSTHOOK: query: insert into table tbl1 select repeat('t', 10), 11 from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.c1 EXPRESSION [] +POSTHOOK: Lineage: tbl1.intcol SIMPLE [] +PREHOOK: query: insert into table tbl1 select repeat('s', 10), 22 from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +POSTHOOK: query: insert into table tbl1 select repeat('s', 10), 22 from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.c1 EXPRESSION [] +POSTHOOK: Lineage: tbl1.intcol SIMPLE [] +PREHOOK: query: insert into table tbl2 select concat(repeat('t', 10), 'ppp') from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +POSTHOOK: query: insert into table tbl2 select concat(repeat('t', 10), 'ppp') from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.c2 EXPRESSION [] +PREHOOK: query: insert into table tbl2 select repeat('s', 10) from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +POSTHOOK: query: insert into table tbl2 select repeat('s', 10) from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.c2 EXPRESSION [] +PREHOOK: query: explain +select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + tbl2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tbl2 + TableScan + alias: tbl2 + Statistics: Num rows: 2 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c2 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 c1 (type: varchar(30)) + 1 c2 (type: varchar(30)) + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl1 + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 c1 (type: varchar(30)) + 1 c2 (type: varchar(30)) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: varchar(10)), _col5 (type: varchar(30)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)), _col1 (type: varchar(30)) + sort order: ++ + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), KEY.reducesinkkey1 (type: varchar(30)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +ssssssssss ssssssssss diff --git ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out index b681e5f..3fafe1f 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out @@ -102,8 +102,8 @@ STAGE PLANS: Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 dec (type: decimal(4,2)) - 1 dec (type: decimal(4,0)) + 0 dec (type: decimal(6,2)) + 1 dec (type: decimal(6,2)) Local Work: Map Reduce Local Work @@ -123,8 +123,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 dec (type: decimal(4,2)) - 1 dec (type: decimal(4,0)) + 0 dec (type: decimal(6,2)) + 1 dec (type: decimal(6,2)) outputColumnNames: _col0, _col4 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out index d6bcf99..7c968ae 100644 --- ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out +++ ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out @@ -105,8 +105,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 dec (type: decimal(4,2)) - 1 dec (type: decimal(4,0)) + 0 dec (type: decimal(6,2)) + 1 dec (type: decimal(6,2)) outputColumnNames: _col0, _col4 input vertices: 1 Map 2 @@ -131,10 +131,11 @@ STAGE PLANS: predicate: dec is not null (type: boolean) Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: dec (type: decimal(4,0)) + key expressions: dec (type: decimal(6,2)) sort order: + - Map-reduce partition columns: dec (type: decimal(4,0)) + Map-reduce partition columns: dec (type: decimal(6,2)) Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE + value expressions: dec (type: decimal(4,0)) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out index 702d450..1484d6f 100644 --- ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out @@ -240,11 +240,11 @@ STAGE PLANS: predicate: c2 is not null (type: boolean) Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: c2 (type: char(10)) + key expressions: c2 (type: char(20)) sort order: + - Map-reduce partition columns: c2 (type: char(10)) + Map-reduce partition columns: c2 (type: char(20)) Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: NONE - value expressions: c1 (type: int) + value expressions: c1 (type: int), c2 (type: char(10)) Execution mode: vectorized Map 2 Map Operator Tree: @@ -258,7 +258,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 c2 (type: char(10)) + 0 c2 (type: char(20)) 1 c2 (type: char(20)) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: diff --git ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out index 207c9d4..2698aec 100644 --- ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out +++ ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out @@ -241,7 +241,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 c2 (type: varchar(10)) + 0 c2 (type: varchar(20)) 1 c2 (type: varchar(20)) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: diff --git ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out index 2ed5797..824944a 100644 --- ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out @@ -238,7 +238,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 c2 (type: char(10)) + 0 c2 (type: char(20)) 1 c2 (type: char(20)) Stage: Stage-2 @@ -254,7 +254,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 c2 (type: char(10)) + 0 c2 (type: char(20)) 1 c2 (type: char(20)) outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 2 Data size: 215 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out index 5427c35..2994bff 100644 --- ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out @@ -236,7 +236,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 c2 (type: varchar(10)) + 0 c2 (type: varchar(20)) 1 c2 (type: varchar(20)) Stage: Stage-2 @@ -252,7 +252,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 c2 (type: varchar(10)) + 0 c2 (type: varchar(20)) 1 c2 (type: varchar(20)) outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index 8dffe63..71ec0f5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -768,12 +768,14 @@ public static TypeInfo getTypeInfoFromTypeString(String typeString) { * @return */ public static boolean isConversionRequiredForComparison(TypeInfo typeA, TypeInfo typeB) { - if (typeA == typeB) { + if (typeA.equals(typeB)) { return false; } - if (TypeInfoUtils.doPrimitiveCategoriesMatch(typeA, typeB)) { + + if (TypeInfoUtils.doPrimitiveCategoriesMatch(typeA, typeB)) { return false; } + return true; }