diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java index 77c50b0..11ed929 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java @@ -412,16 +412,20 @@ private boolean backTrackColumnNames( if(partitionCols != null) { for (ExprNodeDesc desc : partitionCols) { List cols = desc.getCols(); - for(String col : cols) { - columnMapping.put(col, col); + if ( cols != null ) { + for(String col : cols) { + columnMapping.put(col, col); + } } } } if(reduceKeyCols != null) { for (ExprNodeDesc desc : reduceKeyCols) { List cols = desc.getCols(); - for(String col : cols) { - columnMapping.put(col, col); + if ( cols != null ) { + for(String col : cols) { + columnMapping.put(col, col); + } } } } diff --git ql/src/test/queries/clientpositive/reducesink_dedup.q ql/src/test/queries/clientpositive/reducesink_dedup.q new file mode 100644 index 0000000..d5aa251 --- /dev/null +++ ql/src/test/queries/clientpositive/reducesink_dedup.q @@ -0,0 +1,20 @@ +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + + +select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +; \ No newline at end of file diff --git ql/src/test/results/clientpositive/reducesink_dedup.q.out ql/src/test/results/clientpositive/reducesink_dedup.q.out new file mode 100644 index 0000000..32dfdd4 --- /dev/null +++ ql/src/test/results/clientpositive/reducesink_dedup.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here ####