diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java index 77c50b0..11ed929 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java @@ -412,16 +412,20 @@ private boolean backTrackColumnNames( if(partitionCols != null) { for (ExprNodeDesc desc : partitionCols) { List cols = desc.getCols(); - for(String col : cols) { - columnMapping.put(col, col); + if ( cols != null ) { + for(String col : cols) { + columnMapping.put(col, col); + } } } } if(reduceKeyCols != null) { for (ExprNodeDesc desc : reduceKeyCols) { List cols = desc.getCols(); - for(String col : cols) { - columnMapping.put(col, col); + if ( cols != null ) { + for(String col : cols) { + columnMapping.put(col, col); + } } } } diff --git ql/src/test/queries/clientpositive/reducesink_dedup.q ql/src/test/queries/clientpositive/reducesink_dedup.q new file mode 100644 index 0000000..5c90de3 --- /dev/null +++ ql/src/test/queries/clientpositive/reducesink_dedup.q @@ -0,0 +1,21 @@ +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part; + +select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +limit 10; \ No newline at end of file diff --git ql/src/test/results/clientpositive/reducesink_dedup.q.out ql/src/test/results/clientpositive/reducesink_dedup.q.out new file mode 100644 index 0000000..533a70d --- /dev/null +++ ql/src/test/results/clientpositive/reducesink_dedup.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +POSTHOOK: Output: default@part +PREHOOK: query: select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse lavender yellow +almond antique salmon chartreuse burlywood +almond aquamarine burnished black steel +almond aquamarine pink moccasin thistle +almond antique violet chocolate turquoise +almond antique violet turquoise frosted +almond aquamarine midnight light salmon +almond aquamarine rose maroon antique