commit e47f7e606f6ae40cf4bb1adab53455f339b1f3e0 Author: Janaki Lahorani Date: Wed Jun 13 15:24:03 2018 -0700 HIVE-19889: Do not push predicates referencing non deterministic functions diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java index af2207ff41e1b91fbd9c6f51a76da9cdb1b92934..efe20d9f6c5f4f646e5409d598faba8224b8cac1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java @@ -69,7 +69,13 @@ private HiveFilterProjectTransposeRule(Class filterClass, @Override public boolean matches(RelOptRuleCall call) { final Filter filterRel = call.rel(0); - RexNode condition = filterRel.getCondition(); + + // The condition fetched here can reference a udf that is not deterministic, but defined + // as part of the select list when a view is in play. But the condition after the pushdown + // will resolve to using the udf from select list. The check here for deterministic filters + // should be based on the resolved expression. Refer to test case cbo_ppd_non_deterministic.q. + RexNode condition = RelOptUtil.pushPastProject(filterRel.getCondition(), call.rel(1)); + if (this.onlyDeterministic && !HiveCalciteUtil.isDeterministic(condition)) { return false; } diff --git ql/src/test/queries/clientpositive/cbo_ppd_non_deterministic.q ql/src/test/queries/clientpositive/cbo_ppd_non_deterministic.q new file mode 100644 index 0000000000000000000000000000000000000000..f1a7a63d4aff9e2e55d549ea993a612d9d75001c --- /dev/null +++ ql/src/test/queries/clientpositive/cbo_ppd_non_deterministic.q @@ -0,0 +1,42 @@ +CREATE TABLE `testa`( + `col1` string COMMENT '', + `col2` string COMMENT '', + `col3` string COMMENT '', + `col4` string COMMENT '', + `col5` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) +STORED AS AVRO; + +insert into testA partition (part1='US', part2='ABC', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testA partition (part1='UK', part2='DEF', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testA partition (part1='US', part2='DEF', part3='200') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testA partition (part1='CA', part2='ABC', part3='300') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +set hive.cbo.enable=true; +SET hive.vectorized.execution.enabled=false; + +explain select * from ( +select part1,randum123 +from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a +where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20; + +SET hive.vectorized.execution.enabled=true; + +explain select * from ( +select part1,randum123 +from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a +where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20; diff --git ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out new file mode 100644 index 0000000000000000000000000000000000000000..8f00aa8e0401471527e33716a66c156e3c334cb5 --- /dev/null +++ ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out @@ -0,0 +1,195 @@ +PREHOOK: query: CREATE TABLE `testa`( + `col1` string COMMENT '', + `col2` string COMMENT '', + `col3` string COMMENT '', + `col4` string COMMENT '', + `col5` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testa +POSTHOOK: query: CREATE TABLE `testa`( + `col1` string COMMENT '', + `col2` string COMMENT '', + `col3` string COMMENT '', + `col4` string COMMENT '', + `col5` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testa +PREHOOK: query: insert into testA partition (part1='US', part2='ABC', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testa@part1=US/part2=ABC/part3=123 +POSTHOOK: query: insert into testA partition (part1='US', part2='ABC', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testa@part1=US/part2=ABC/part3=123 +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col1 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col2 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col3 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col4 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col5 SCRIPT [] +PREHOOK: query: insert into testA partition (part1='UK', part2='DEF', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testa@part1=UK/part2=DEF/part3=123 +POSTHOOK: query: insert into testA partition (part1='UK', part2='DEF', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testa@part1=UK/part2=DEF/part3=123 +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col1 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col2 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col3 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col4 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col5 SCRIPT [] +PREHOOK: query: insert into testA partition (part1='US', part2='DEF', part3='200') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testa@part1=US/part2=DEF/part3=200 +POSTHOOK: query: insert into testA partition (part1='US', part2='DEF', part3='200') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testa@part1=US/part2=DEF/part3=200 +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col1 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col2 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col3 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col4 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col5 SCRIPT [] +PREHOOK: query: insert into testA partition (part1='CA', part2='ABC', part3='300') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testa@part1=CA/part2=ABC/part3=300 +POSTHOOK: query: insert into testA partition (part1='CA', part2='ABC', part3='300') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testa@part1=CA/part2=ABC/part3=300 +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col1 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col2 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col3 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col4 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col5 SCRIPT [] +PREHOOK: query: explain select * from ( +select part1,randum123 +from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a +where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from ( +select part1,randum123 +from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a +where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: testa + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rand() (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 <= 0.5D) and (_col0 > 0.25D)) (type: boolean) + Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'CA' (type: string), _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from ( +select part1,randum123 +from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a +where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from ( +select part1,randum123 +from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a +where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: testa + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rand() (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 <= 0.5D) and (_col0 > 0.25D)) (type: boolean) + Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'CA' (type: string), _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/ppd_udf_col.q.out ql/src/test/results/clientpositive/ppd_udf_col.q.out index 97ca3835930e050d517361f634b3f2e4a7d3842f..ee5d300ba1f245869213cc395d196be7b0c839db 100644 --- ql/src/test/results/clientpositive/ppd_udf_col.q.out +++ ql/src/test/results/clientpositive/ppd_udf_col.q.out @@ -20,19 +20,22 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) = 100.0D) and (rand() <= 0.1D)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) = 100.0D) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), rand() (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 <= 0.1D) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 @@ -67,18 +70,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) = 100.0D) and (rand() <= 0.1D) and (rand() > 0.1D)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), rand() (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -199,19 +202,22 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) = 100.0D) and (rand() <= 0.1D)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) = 100.0D) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), rand() (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 <= 0.1D) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 @@ -246,18 +252,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) = 100.0D) and (rand() <= 0.1D) and (rand() > 0.1D)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), rand() (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union_offcbo.q.out ql/src/test/results/clientpositive/union_offcbo.q.out index a723f00f7fcab3fc9146e02bf41c85b6410335eb..84800434f596fae0d0c75c2dce9dd575b811513b 100644 --- ql/src/test/results/clientpositive/union_offcbo.q.out +++ ql/src/test/results/clientpositive/union_offcbo.q.out @@ -591,18 +591,21 @@ STAGE PLANS: alias: ttest1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((ts1 = '2015-11-20') and reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) is not null) (type: boolean) + predicate: (ts1 = '2015-11-20') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) (type: string), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: ttest2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -681,23 +684,26 @@ STAGE PLANS: alias: ttest1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((ts1 = '2015-11-20') and reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) is not null) (type: boolean) + predicate: (ts1 = '2015-11-20') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: id1 (type: bigint), sts (type: string), at1 (type: bigint), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) (type: string), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + Filter Operator + predicate: _col3 is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), _col4 (type: string) + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), _col4 (type: string) TableScan alias: ttest2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: '2015-11-20' BETWEEN dt1 AND dt2 (type: boolean) + predicate: ('2015-11-20' BETWEEN dt1 AND dt2 and khash is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: ts1 (type: string), khash (type: string), rhash (type: string) @@ -1630,18 +1636,21 @@ STAGE PLANS: alias: ttest1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((ts1 = '2015-11-20') and reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) is not null) (type: boolean) + predicate: (ts1 = '2015-11-20') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) (type: string), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: ttest2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1720,23 +1729,26 @@ STAGE PLANS: alias: ttest1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((ts1 = '2015-11-20') and reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) is not null) (type: boolean) + predicate: (ts1 = '2015-11-20') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: id1 (type: bigint), sts (type: string), at1 (type: bigint), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) (type: string), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + Filter Operator + predicate: _col3 is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), _col4 (type: string) + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), _col4 (type: string) TableScan alias: ttest2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: '2015-11-20' BETWEEN dt1 AND dt2 (type: boolean) + predicate: ('2015-11-20' BETWEEN dt1 AND dt2 and khash is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: ts1 (type: string), khash (type: string), rhash (type: string)