diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java index f95ebcd..3bd01f2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java @@ -18,7 +18,7 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator; - +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.parquet.filter2.predicate.FilterApi; import org.apache.parquet.filter2.predicate.FilterPredicate; import org.apache.parquet.io.api.Binary; @@ -167,9 +167,11 @@ public FilterPredicate buildPredict(Operator op, Object constant, * supported yet. * @param type FilterPredicateType * @return + * @throws HiveException Exception is thrown for unsupported data types so we can skip filtering */ - public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type, - Type parquetType){ + public FilterPredicateLeafBuilder getLeafFilterBuilderByType( + PredicateLeaf.Type type, + Type parquetType) throws HiveException { switch (type){ case LONG: if (parquetType.asPrimitiveType().getPrimitiveTypeName() == @@ -193,8 +195,9 @@ public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type case DECIMAL: case TIMESTAMP: default: - LOG.debug("Conversion to Parquet FilterPredicate not supported for " + type); - return null; + String msg = "Conversion to Parquet FilterPredicate not supported for " + type; + LOG.debug(msg); + throw new HiveException(msg); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java index 786a260..47777f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java @@ -39,7 +39,8 @@ /** * Translate the search argument to the filter predicate parquet uses. It includes * only the columns from the passed schema. - * @return translate the sarg into a filter predicate + * @return a filter predicate translated from search argument. null is returned + * if failed to convert. */ public static FilterPredicate toFilterPredicate(SearchArgument sarg, MessageType schema) { Set columns = null; @@ -50,13 +51,17 @@ public static FilterPredicate toFilterPredicate(SearchArgument sarg, MessageType } } - return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema); + try { + return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema); + } catch(Exception e) { + return null; + } } private static FilterPredicate translate(ExpressionTree root, List leaves, Set columns, - MessageType schema) { + MessageType schema) throws Exception { FilterPredicate p = null; switch (root.getOperator()) { case OR: @@ -113,15 +118,13 @@ private static FilterPredicate translate(ExpressionTree root, } private static FilterPredicate buildFilterPredicateFromPredicateLeaf - (PredicateLeaf leaf, Type parquetType) { + (PredicateLeaf leaf, Type parquetType) throws Exception { LeafFilterFactory leafFilterFactory = new LeafFilterFactory(); FilterPredicateLeafBuilder builder; try { builder = leafFilterFactory .getLeafFilterBuilderByType(leaf.getType(), parquetType); - if (builder == null) { - return null; - } + if (isMultiLiteralsOperator(leaf.getOperator())) { return builder.buildPredicate(leaf.getOperator(), leaf.getLiteralList(), @@ -134,7 +137,7 @@ private static FilterPredicate translate(ExpressionTree root, } } catch (Exception e) { LOG.error("fail to build predicate filter leaf with errors" + e, e); - return null; + throw e; } } diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_multifiles.q b/ql/src/test/queries/clientpositive/parquet_ppd_multifiles.q new file mode 100644 index 0000000..6483684 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_ppd_multifiles.q @@ -0,0 +1,13 @@ +CREATE TABLE parquet_ppd_multifiles ( + name string, + dec decimal(5,0) +) stored as parquet; + +insert into table parquet_ppd_multifiles values('Jim', 3); +insert into table parquet_ppd_multifiles values('Tom', 5); + +set hive.optimize.index.filter=false; +select * from parquet_ppd_multifiles where (name = 'Jim' or dec = 5); + +set hive.optimize.index.filter=true; +select * from parquet_ppd_multifiles where (name = 'Jim' or dec = 5); diff --git a/ql/src/test/results/clientpositive/parquet_ppd_multifiles.q.out b/ql/src/test/results/clientpositive/parquet_ppd_multifiles.q.out new file mode 100644 index 0000000..d7688f8 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_ppd_multifiles.q.out @@ -0,0 +1,50 @@ +PREHOOK: query: CREATE TABLE parquet_ppd_multifiles ( + name string, + dec decimal(5,0) +) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_ppd_multifiles +POSTHOOK: query: CREATE TABLE parquet_ppd_multifiles ( + name string, + dec decimal(5,0) +) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_ppd_multifiles +PREHOOK: query: insert into table parquet_ppd_multifiles values('Jim', 3) +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet_ppd_multifiles +POSTHOOK: query: insert into table parquet_ppd_multifiles values('Jim', 3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet_ppd_multifiles +POSTHOOK: Lineage: parquet_ppd_multifiles.dec EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: parquet_ppd_multifiles.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into table parquet_ppd_multifiles values('Tom', 5) +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet_ppd_multifiles +POSTHOOK: query: insert into table parquet_ppd_multifiles values('Tom', 5) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet_ppd_multifiles +POSTHOOK: Lineage: parquet_ppd_multifiles.dec EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: parquet_ppd_multifiles.name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from parquet_ppd_multifiles where (name = 'Jim' or dec = 5) +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ppd_multifiles +#### A masked pattern was here #### +POSTHOOK: query: select * from parquet_ppd_multifiles where (name = 'Jim' or dec = 5) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ppd_multifiles +#### A masked pattern was here #### +Jim 3 +Tom 5 +PREHOOK: query: select * from parquet_ppd_multifiles where (name = 'Jim' or dec = 5) +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ppd_multifiles +#### A masked pattern was here #### +POSTHOOK: query: select * from parquet_ppd_multifiles where (name = 'Jim' or dec = 5) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ppd_multifiles +#### A masked pattern was here #### +Jim 3 +Tom 5