diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java index be4c0d5..2558a06 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java @@ -33,6 +33,7 @@ import static org.apache.parquet.filter2.predicate.FilterApi.doubleColumn; import static org.apache.parquet.filter2.predicate.FilterApi.floatColumn; import static org.apache.parquet.filter2.predicate.FilterApi.intColumn; +import static org.apache.parquet.filter2.predicate.FilterApi.or; public class LeafFilterFactory { private static final Logger LOG = LoggerFactory.getLogger(LeafFilterFactory.class); @@ -150,6 +151,17 @@ public FilterPredicate buildPredict(Operator op, Object constant, return lt(binaryColumn(columnName), Binary.fromString((String) constant)); case IS_NULL: case EQUALS: + if (constant == null) { + return eq(binaryColumn(columnName), null); + } + String value = (String) constant; + if (value.endsWith(" ")) { + String regex = "\\s+$"; + String trimmedValue = value.replaceAll(regex, ""); + return or(eq(binaryColumn(columnName), Binary.fromString(value)), + eq(binaryColumn(columnName), Binary.fromString(trimmedValue))); + } + return eq(binaryColumn(columnName), Binary.fromString(value)); case NULL_SAFE_EQUALS: return eq(binaryColumn(columnName), (constant == null) ? null : Binary.fromString((String) constant)); diff --git ql/src/test/queries/clientpositive/parquet_ppd_char.q ql/src/test/queries/clientpositive/parquet_ppd_char.q index 4230d8c..d829bcf 100644 --- ql/src/test/queries/clientpositive/parquet_ppd_char.q +++ ql/src/test/queries/clientpositive/parquet_ppd_char.q @@ -1,7 +1,6 @@ --! qt:dataset:src1 --! qt:dataset:src -set hive.parquet.timestamp.skip.conversion=true; set hive.vectorized.execution.enabled=false; SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; SET hive.optimize.ppd=true; @@ -78,4 +77,4 @@ set hive.optimize.index.filter=false; select * from newtypestbl_n3 where c between "carrot" and "carrot1"; set hive.optimize.index.filter=true; -select * from newtypestbl_n3 where c between "carrot" and "carrot1"; \ No newline at end of file +select * from newtypestbl_n3 where c between "carrot" and "carrot1";