diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java index a5dcffb..73a67a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java @@ -106,6 +106,26 @@ private static RexNode simplifyNot(RexBuilder rexBuilder, RexCall call) { return simplify(rexBuilder, rexBuilder.makeCall(op(negateKind2), ((RexCall) a).getOperands())); } + if (a.getKind() == SqlKind.AND) { + // NOT distributivity for AND + final List newOperands = new ArrayList<>(); + for (RexNode operand : ((RexCall) a).getOperands()) { + newOperands.add(simplify(rexBuilder, + rexBuilder.makeCall(SqlStdOperatorTable.NOT, operand))); + } + return simplify(rexBuilder, + rexBuilder.makeCall(SqlStdOperatorTable.OR, newOperands)); + } + if (a.getKind() == SqlKind.OR) { + // NOT distributivity for OR + final List newOperands = new ArrayList<>(); + for (RexNode operand : ((RexCall) a).getOperands()) { + newOperands.add(simplify(rexBuilder, + rexBuilder.makeCall(SqlStdOperatorTable.NOT, operand))); + } + return simplify(rexBuilder, + rexBuilder.makeCall(SqlStdOperatorTable.AND, newOperands)); + } return call; } diff --git ql/src/test/results/clientpositive/folder_predicate.q.out ql/src/test/results/clientpositive/folder_predicate.q.out index 48a4889..7fcc172 100644 --- ql/src/test/results/clientpositive/folder_predicate.q.out +++ ql/src/test/results/clientpositive/folder_predicate.q.out @@ -37,15 +37,15 @@ STAGE PLANS: alias: predicate_fold_tb Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not (value is not null and (value = 3))) (type: boolean) - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: (value is null or (value <> 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -88,15 +88,15 @@ STAGE PLANS: alias: predicate_fold_tb Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not (value is not null and (value >= 3))) (type: boolean) - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (value is null or (value < 3)) (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,15 +137,15 @@ STAGE PLANS: alias: predicate_fold_tb Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not (value is not null and (value <= 3))) (type: boolean) - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (value is null or (value > 3)) (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -186,15 +186,15 @@ STAGE PLANS: alias: predicate_fold_tb Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not (value is not null and (value > 3))) (type: boolean) - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (value is null or (value <= 3)) (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -236,15 +236,15 @@ STAGE PLANS: alias: predicate_fold_tb Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not (value is not null and (value < 3))) (type: boolean) - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (value is null or (value >= 3)) (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -286,15 +286,15 @@ STAGE PLANS: alias: predicate_fold_tb Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not (value is not null and (value <> 3))) (type: boolean) - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + predicate: (value is null or (value = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -334,7 +334,7 @@ STAGE PLANS: alias: predicate_fold_tb Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not (value is not null and (value > 1) and (value <= 3))) (type: boolean) + predicate: (value is null or (value <= 1) or (value > 3)) (type: boolean) Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: int) diff --git ql/src/test/results/clientpositive/rand_partitionpruner3.q.out ql/src/test/results/clientpositive/rand_partitionpruner3.q.out index 6377e95..9e2878f 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner3.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner3.q.out @@ -21,7 +21,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -65,12 +65,12 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((rand(1) < 0.1) and (not ((UDFToDouble(key) > 50.0) or (UDFToDouble(key) < 10.0)))) (type: boolean) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + predicate: ((rand(1) < 0.1) and (UDFToDouble(key) <= 50.0) and (UDFToDouble(key) >= 10.0)) (type: boolean) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' @@ -109,7 +109,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -153,12 +153,12 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (not ((UDFToDouble(key) > 50.0) or (UDFToDouble(key) < 10.0))) (type: boolean) - Statistics: Num rows: 168 Data size: 1784 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) <= 50.0) and (UDFToDouble(key) >= 10.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 168 Data size: 1784 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' diff --git ql/src/test/results/clientpositive/union_offcbo.q.out ql/src/test/results/clientpositive/union_offcbo.q.out index 38aaaa5..2c4c0de 100644 --- ql/src/test/results/clientpositive/union_offcbo.q.out +++ ql/src/test/results/clientpositive/union_offcbo.q.out @@ -629,7 +629,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END <> 'RET') and (not ((NVL(_col0,-1) = NVL(_col7,-1)) and (NVL(_col1,-1) = NVL(_col8,-1))))) (type: boolean) + predicate: (((NVL(_col0,-1) <> NVL(_col7,-1)) or (NVL(_col1,-1) <> NVL(_col8,-1))) and (CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END <> 'RET')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col2 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col4 (type: string), _col7 (type: string), _col8 (type: string), CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END (type: string) @@ -719,7 +719,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END <> 'RET') and (not ((NVL(_col3,-1) = NVL(_col6,-1)) and (NVL(_col4,-1) = NVL(_col7,-1))))) (type: boolean) + predicate: (((NVL(_col3,-1) <> NVL(_col6,-1)) or (NVL(_col4,-1) <> NVL(_col7,-1))) and (CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END <> 'RET')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), '2099-12-31' (type: string), _col3 (type: string), _col4 (type: string), CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END (type: string) @@ -1652,7 +1652,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END <> 'RET') and (not ((NVL(_col0,-1) = NVL(_col7,-1)) and (NVL(_col1,-1) = NVL(_col8,-1))))) (type: boolean) + predicate: (((NVL(_col0,-1) <> NVL(_col7,-1)) or (NVL(_col1,-1) <> NVL(_col8,-1))) and (CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END <> 'RET')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col2 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col4 (type: string), _col7 (type: string), _col8 (type: string), CASE WHEN ((_col7 is not null and _col0 is null and (_col3 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col7 is not null and _col0 is null and (_col3 <= '2016-02-05'))) THEN ('RET') WHEN (((_col7 = _col0) and (_col8 <> _col1))) THEN ('A_INS') ELSE ('NA') END (type: string) @@ -1742,7 +1742,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END <> 'RET') and (not ((NVL(_col3,-1) = NVL(_col6,-1)) and (NVL(_col4,-1) = NVL(_col7,-1))))) (type: boolean) + predicate: (((NVL(_col3,-1) <> NVL(_col6,-1)) or (NVL(_col4,-1) <> NVL(_col7,-1))) and (CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END <> 'RET')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), '2099-12-31' (type: string), _col3 (type: string), _col4 (type: string), CASE WHEN ((_col6 is not null and _col3 is null and (_col5 <= '2015-11-20'))) THEN ('DEL') WHEN (((_col6 is null and _col3 is not null) or ((_col6 = _col3) and (_col7 <> _col4)))) THEN ('INS') ELSE ('NA') END (type: string)