diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java index be9fb10893..d7887840d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java @@ -197,7 +197,7 @@ protected void setTimeZoneConversion(Configuration configuration, Path finalPath FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); if (p != null) { // Filter may have sensitive information. Do not send to debug. - LOG.debug("PARQUET predicate push down generated."); + LOG.debug("PARQUET predicate push down generated. Predicates = [" + p + "]"); ParquetInputFormat.setFilterPredicate(conf, p); return FilterCompat.get(p); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java index 47777f8c07..b7a6bc5eb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java @@ -66,14 +66,15 @@ private static FilterPredicate translate(ExpressionTree root, switch (root.getOperator()) { case OR: for(ExpressionTree child: root.getChildren()) { + FilterPredicate childPredicate = translate(child, leaves, columns, schema); + if (childPredicate == null) { + return null; + } + if (p == null) { - p = translate(child, leaves, columns, schema); + p = childPredicate; } else { - FilterPredicate right = translate(child, leaves, columns, schema); - // constant means no filter, ignore it when it is null - if(right != null){ - p = FilterApi.or(p, right); - } + p = FilterApi.or(p, childPredicate); } } return p; diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java index bd1f5e0420..8e7acd2f80 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java @@ -33,19 +33,63 @@ public void testFilterColumnsThatDoNoExistOnSchema() { MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required binary stinger; }"); SearchArgument sarg = SearchArgumentFactory.newBuilder() - .startNot() + .startNot() + .startOr() + .isNull("a", PredicateLeaf.Type.LONG) + .between("y", PredicateLeaf.Type.LONG, 10L, 20L) // Column will be removed from filter + .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) // Column will be removed from filter + .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger") + .end() + .end() + .build(); + + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); + + String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); + } + + @Test + public void testFilterColumnsThatDoNoExistOnSchemaHighOrder1() { + MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required int32 b; }"); + SearchArgument sarg = SearchArgumentFactory.newBuilder() .startOr() - .isNull("a", PredicateLeaf.Type.LONG) - .between("y", PredicateLeaf.Type.LONG, 10L, 20L) // Column will be removed from filter - .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) // Column will be removed from filter - .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger") + .startAnd() + .equals("a", PredicateLeaf.Type.LONG, 1L) + .equals("none", PredicateLeaf.Type.LONG, 1L) + .end() + .startAnd() + .equals("a", PredicateLeaf.Type.LONG, 999L) + .equals("none", PredicateLeaf.Type.LONG, 999L) .end() .end() .build(); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); - String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))"; + String expected = "or(eq(a, 1), eq(a, 999))"; + assertEquals(expected, p.toString()); + } + + @Test + public void testFilterColumnsThatDoNoExistOnSchemaHighOrder2() { + MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required int32 b; }"); + SearchArgument sarg = SearchArgumentFactory.newBuilder() + .startAnd() + .startOr() + .equals("a", PredicateLeaf.Type.LONG, 1L) + .equals("b", PredicateLeaf.Type.LONG, 1L) + .end() + .startOr() + .equals("a", PredicateLeaf.Type.LONG, 999L) + .equals("none", PredicateLeaf.Type.LONG, 999L) + .end() + .end() + .build(); + + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); + + String expected = "or(eq(a, 1), eq(b, 1))"; assertEquals(expected, p.toString()); } diff --git ql/src/test/queries/clientpositive/parquet_predicate_pushdown_2.q ql/src/test/queries/clientpositive/parquet_predicate_pushdown_2.q new file mode 100644 index 0000000000..1b63a429e4 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_predicate_pushdown_2.q @@ -0,0 +1,7 @@ +SET hive.optimize.ppd=true; +SET hive.optimize.index.filter=true; + +create table test_parq(a int, b int) partitioned by (p int) stored as parquet; +insert overwrite table test_parq partition (p=1) values (1, 1); +select * from test_parq where a=1 and p=1; +select * from test_parq where (a=1 and p=1) or (a=999 and p=999); diff --git ql/src/test/results/clientpositive/parquet_predicate_pushdown_2.q.out ql/src/test/results/clientpositive/parquet_predicate_pushdown_2.q.out new file mode 100644 index 0000000000..6cdd0a8fb2 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_predicate_pushdown_2.q.out @@ -0,0 +1,38 @@ +PREHOOK: query: create table test_parq(a int, b int) partitioned by (p int) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_parq +POSTHOOK: query: create table test_parq(a int, b int) partitioned by (p int) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_parq +PREHOOK: query: insert overwrite table test_parq partition (p=1) values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_parq@p=1 +POSTHOOK: query: insert overwrite table test_parq partition (p=1) values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_parq@p=1 +POSTHOOK: Lineage: test_parq PARTITION(p=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test_parq PARTITION(p=1).b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select * from test_parq where a=1 and p=1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parq +PREHOOK: Input: default@test_parq@p=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_parq where a=1 and p=1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parq +POSTHOOK: Input: default@test_parq@p=1 +#### A masked pattern was here #### +1 1 1 +PREHOOK: query: select * from test_parq where (a=1 and p=1) or (a=999 and p=999) +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parq +PREHOOK: Input: default@test_parq@p=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_parq where (a=1 and p=1) or (a=999 and p=999) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parq +POSTHOOK: Input: default@test_parq@p=1 +#### A masked pattern was here #### +1 1 1