From cb5954fd7e1825aa670f8419130784bf87ff9670 Mon Sep 17 00:00:00 2001 From: Naresh P R Date: Sun, 9 Jun 2019 14:06:24 +0530 Subject: [PATCH] HIVE-21837 - MapJoin is throwing exception when selected column is having completely null values Signed-off-by: Naresh P R --- .../ql/exec/vector/VectorizedBatchUtil.java | 2 + .../clientpositive/vectorized_mapjoin3.q | 12 ++++ .../llap/vectorized_mapjoin3.q.out | 68 +++++++++++++++++++ .../clientpositive/vectorized_mapjoin3.q.out | 68 +++++++++++++++++++ 4 files changed, 150 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 617cbf15a8..ec24c10845 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -619,6 +619,8 @@ public static ColumnVector makeLikeColumnVector(ColumnVector source copy[i] = makeLikeColumnVector(src.fields[i]); } return new UnionColumnVector(src.tags.length, copy); + } else if (source instanceof VoidColumnVector) { + return new VoidColumnVector(VectorizedRowBatch.DEFAULT_SIZE); } else throw new HiveException("Column vector class " + source.getClass().getName() + diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q b/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q index c216499cdb..954456d807 100644 --- a/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q +++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q @@ -7,6 +7,7 @@ set hive.auto.convert.join=true; create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc; create temporary table table_6 (int_col_0 int) stored as orc; +CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc; insert into table_19 values (418.9, 1000), @@ -17,6 +18,9 @@ insert into table_19 values insert into table_6 values (1000); +INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST'); SELECT t1.decimal0801_col FROM table_19 t1 @@ -32,6 +36,14 @@ SELECT t1.decimal0801_col FROM table_19 t1 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null; +SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')); set hive.explain.user=false; diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index e9f73448b5..e66e2bd1bb 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -14,6 +14,14 @@ POSTHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@table_6 +PREHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_27 +POSTHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_27 PREHOOK: query: insert into table_19 values (418.9, 1000), (418.9, -759), @@ -43,6 +51,21 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@table_6 POSTHOOK: Lineage: table_6.int_col_0 SCRIPT [] +PREHOOK: query: INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_27 +POSTHOOK: query: INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_27 +POSTHOOK: Lineage: table_27.f_id SCRIPT [] +POSTHOOK: Lineage: table_27.t_id SCRIPT [] +POSTHOOK: Lineage: table_27.type SCRIPT [] PREHOOK: query: SELECT t1.decimal0801_col FROM table_19 t1 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null @@ -88,6 +111,51 @@ POSTHOOK: Input: default@table_19 POSTHOOK: Input: default@table_6 #### A masked pattern was here #### 418.9 +Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_0]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@table_27 +#### A masked pattern was here #### +POSTHOOK: query: SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@table_27 +#### A masked pattern was here #### +J NULL +J NULL +I NULL +I NULL +H NULL +H NULL +G NULL +G NULL +F NULL +F NULL +E NULL +E NULL +D NULL +D NULL +C NULL +C NULL +B NULL +B NULL +A NULL +A NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT t1.decimal0801_col FROM table_19 t1 diff --git a/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out index 0c94b8ed09..2d16dd81af 100644 --- a/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out @@ -14,6 +14,14 @@ POSTHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@table_6 +PREHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_27 +POSTHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_27 PREHOOK: query: insert into table_19 values (418.9, 1000), (418.9, -759), @@ -43,6 +51,21 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@table_6 POSTHOOK: Lineage: table_6.int_col_0 SCRIPT [] +PREHOOK: query: INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_27 +POSTHOOK: query: INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_27 +POSTHOOK: Lineage: table_27.f_id SCRIPT [] +POSTHOOK: Lineage: table_27.t_id SCRIPT [] +POSTHOOK: Lineage: table_27.type SCRIPT [] PREHOOK: query: SELECT t1.decimal0801_col FROM table_19 t1 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null @@ -88,6 +111,51 @@ POSTHOOK: Input: default@table_19 POSTHOOK: Input: default@table_6 #### A masked pattern was here #### 418.9 +Warning: Map Join MAPJOIN[25][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +PREHOOK: query: SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@table_27 +#### A masked pattern was here #### +POSTHOOK: query: SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@table_27 +#### A masked pattern was here #### +A NULL +A NULL +B NULL +B NULL +C NULL +C NULL +D NULL +D NULL +E NULL +E NULL +F NULL +F NULL +G NULL +G NULL +H NULL +H NULL +I NULL +I NULL +J NULL +J NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT t1.decimal0801_col FROM table_19 t1 -- 2.18.0