diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java index fe2094ad2c7669a87978cd0ff1e0c8a5e1d021e2..38139ee1dacd124d1e9121ead8d47ca81a9827db 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java @@ -57,9 +57,9 @@ public FilterPredicate buildPredicate(PredicateLeaf.Operator op, List li } Object min = literals.get(0); Object max = literals.get(1); - FilterPredicate lt = not(buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS, + FilterPredicate lt = not(buildPredict(PredicateLeaf.Operator.LESS_THAN, min, columnName)); - FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN, max, columnName); + FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS, max, columnName); result = FilterApi.and(gt, lt); return result; default: diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java index e92b696a0b648a8b3b075bb83aaed8c3a2c3d797..35d0342d1700b021bdc7140fc3fc13c01ca79ff2 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java @@ -64,7 +64,7 @@ public void testBuilder() throws Exception { FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = - "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " + + "and(and(and(not(eq(x, null)), not(and(lteq(y, 20), not(lt(y, 10))))), not(or(or(eq(z, 1), " + "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))"; assertEquals(expected, p.toString()); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java index 2be2596bacbdd443635ed00e5874db3d80b2451d..bd1f5e0420e48776363ce285ae8d8d27a26aa8a9 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java @@ -66,7 +66,36 @@ public void testFilterFloatColumns() { FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = - "and(and(not(eq(a, null)), not(and(lt(a, 20.3), not(lteq(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))"; + "and(and(not(eq(a, null)), not(and(lteq(a, 20.3), not(lt(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))"; + assertEquals(expected, p.toString()); + } + + @Test + public void testFilterBetween() { + MessageType schema = + MessageTypeParser.parseMessageType("message test { required int32 bCol; }"); + SearchArgument sarg = SearchArgumentFactory.newBuilder() + .between("bCol", PredicateLeaf.Type.LONG, 1L, 5L) + .build(); + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); + String expected = + "and(lteq(bCol, 5), not(lt(bCol, 1)))"; + assertEquals(expected, p.toString()); + + sarg = SearchArgumentFactory.newBuilder() + .between("bCol", PredicateLeaf.Type.LONG, 5L, 1L) + .build(); + p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); + expected = + "and(lteq(bCol, 1), not(lt(bCol, 5)))"; + assertEquals(expected, p.toString()); + + sarg = SearchArgumentFactory.newBuilder() + .between("bCol", PredicateLeaf.Type.LONG, 1L, 1L) + .build(); + p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); + expected = + "and(lteq(bCol, 1), not(lt(bCol, 1)))"; assertEquals(expected, p.toString()); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java index a0fa7002559c6e1df1e289367df34d1f959e78c9..29ec89a171b1bde0ffc9dce9e5b1c970b4dee01b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java @@ -1274,8 +1274,8 @@ public void testExpression3() throws Exception { assertEquals(3, leaves.size()); String[] conditions = new String[]{ - "lt(id, 45)", /* id between 23 and 45 */ - "not(lteq(id, 23))", /* id between 23 and 45 */ + "lteq(id, 45)", /* id between 23 and 45 */ + "not(lt(id, 23))", /* id between 23 and 45 */ "eq(first_name, Binary{\"alan\"})", /* first_name = 'alan' */ "eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */ }; @@ -1772,7 +1772,7 @@ public void testExpression5() throws Exception { " required binary first_name; }"); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = - "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))"; + "and(lteq(first_name, Binary{\"greg\"}), not(lt(first_name, Binary{\"david\"})))"; assertEquals(p.toString(), expected); assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getType()); diff --git a/ql/src/test/queries/clientpositive/parquet_ppd.q b/ql/src/test/queries/clientpositive/parquet_ppd.q new file mode 100644 index 0000000000000000000000000000000000000000..56ca96ee61549c98d39aad51ca75d6be054d1df4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_ppd.q @@ -0,0 +1,20 @@ +CREATE TABLE parquet_tbl( + key int, + ldate string) + PARTITIONED BY ( + lyear string ) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; + +insert overwrite table parquet_tbl partition (lyear='2016') select + 1, + '2016-02-03' from src limit 1; + +set hive.optimize.ppd.storage = true; +set hive.optimize.ppd = true; +select * from parquet_tbl where ldate between '2016-02-03' and '2016-02-03'; +drop table parquet_tbl; diff --git a/ql/src/test/results/clientpositive/parquet_ppd.q.out b/ql/src/test/results/clientpositive/parquet_ppd.q.out new file mode 100644 index 0000000000000000000000000000000000000000..5f7628e83d68c0915ba9b1d5866bc05f68eb90a7 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_ppd.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: CREATE TABLE parquet_tbl( + key int, + ldate string) + PARTITIONED BY ( + lyear string ) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_tbl +POSTHOOK: query: CREATE TABLE parquet_tbl( + key int, + ldate string) + PARTITIONED BY ( + lyear string ) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_tbl +PREHOOK: query: insert overwrite table parquet_tbl partition (lyear='2016') select + 1, + '2016-02-03' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@parquet_tbl@lyear=2016 +POSTHOOK: query: insert overwrite table parquet_tbl partition (lyear='2016') select + 1, + '2016-02-03' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@parquet_tbl@lyear=2016 +POSTHOOK: Lineage: parquet_tbl PARTITION(lyear=2016).key SIMPLE [] +POSTHOOK: Lineage: parquet_tbl PARTITION(lyear=2016).ldate SIMPLE [] +PREHOOK: query: select * from parquet_tbl where ldate between '2016-02-03' and '2016-02-03' +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_tbl +PREHOOK: Input: default@parquet_tbl@lyear=2016 +#### A masked pattern was here #### +POSTHOOK: query: select * from parquet_tbl where ldate between '2016-02-03' and '2016-02-03' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_tbl +POSTHOOK: Input: default@parquet_tbl@lyear=2016 +#### A masked pattern was here #### +1 2016-02-03 2016 +PREHOOK: query: drop table parquet_tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_tbl +PREHOOK: Output: default@parquet_tbl +POSTHOOK: query: drop table parquet_tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_tbl +POSTHOOK: Output: default@parquet_tbl