diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 20d0304..8c11e6b 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -526,8 +526,20 @@ protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass // push down projections. ColumnProjectionUtils.appendReadColumns( jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns()); - // push down filters - pushFilters(jobConf, ts); + } + } + + // HIVE-10792: only push filters when there is exactly one alias. + // Consider the following SQL, + // select * from test_orc t1 + // left outer join test_orc t2 on (t1.c0=t2.c0 and t2.c1=0); + // In Map phase, "test_orc" is scanned only once using OrcInputFormat. For + // each row, MapOperator will forward it to its two child TableScanOperators. + // If "c1=0" is pushed down to OrcInputFormat, t1 will not receive all rows. + if (aliases.size() == 1) { + Operator op = mrwork.getAliasToWork().get(aliases.get(0)); + if (op instanceof TableScanOperator) { + pushFilters(jobConf, (TableScanOperator) op); } } } diff --git a/ql/src/test/queries/clientpositive/orc_ppd_disable1.q b/ql/src/test/queries/clientpositive/orc_ppd_disable1.q new file mode 100644 index 0000000..56e743f --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_ppd_disable1.q @@ -0,0 +1,14 @@ +set hive.optimize.index.filter=true; + +create table test_orc_ppd_disable (c0 int, c1 int) stored as orc; + +insert overwrite table test_orc_ppd_disable select 0, 1 from src limit 1; + +explain +select * from test_orc_ppd_disable t1 +union all +select * from test_orc_ppd_disable t2 where t2.c0 = 1; + +select * from test_orc_ppd_disable t1 +union all +select * from test_orc_ppd_disable t2 where t2.c0 = 1; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/orc_ppd_disable2.q b/ql/src/test/queries/clientpositive/orc_ppd_disable2.q new file mode 100644 index 0000000..1173af7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_ppd_disable2.q @@ -0,0 +1,13 @@ +set hive.optimize.index.filter=true; +set hive.auto.convert.join=false; -- force common join + +create table test_orc_ppd_disable (c0 int, c1 int) stored as orc; + +insert overwrite table test_orc_ppd_disable select 0, 1 from src limit 1; + +explain +select * from test_orc_ppd_disable t1 +left outer join test_orc_ppd_disable t2 on (t1.c0=t2.c0 and t2.c1=0); + +select * from test_orc_ppd_disable t1 +left outer join test_orc_ppd_disable t2 on (t1.c0=t2.c0 and t2.c1=0); \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/orc_ppd_disable1.q.out b/ql/src/test/results/clientpositive/orc_ppd_disable1.q.out new file mode 100644 index 0000000..0eff966 --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_ppd_disable1.q.out @@ -0,0 +1,92 @@ +PREHOOK: query: create table test_orc_ppd_disable (c0 int, c1 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_orc_ppd_disable +POSTHOOK: query: create table test_orc_ppd_disable (c0 int, c1 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_orc_ppd_disable +PREHOOK: query: insert overwrite table test_orc_ppd_disable select 0, 1 from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_orc_ppd_disable +POSTHOOK: query: insert overwrite table test_orc_ppd_disable select 0, 1 from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_orc_ppd_disable +POSTHOOK: Lineage: test_orc_ppd_disable.c0 SIMPLE [] +POSTHOOK: Lineage: test_orc_ppd_disable.c1 SIMPLE [] +PREHOOK: query: explain +select * from test_orc_ppd_disable t1 +union all +select * from test_orc_ppd_disable t2 where t2.c0 = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test_orc_ppd_disable t1 +union all +select * from test_orc_ppd_disable t2 where t2.c0 = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c0 (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: t1 + filterExpr: (c0 = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c0 = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test_orc_ppd_disable t1 +union all +select * from test_orc_ppd_disable t2 where t2.c0 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_orc_ppd_disable +#### A masked pattern was here #### +POSTHOOK: query: select * from test_orc_ppd_disable t1 +union all +select * from test_orc_ppd_disable t2 where t2.c0 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_orc_ppd_disable +#### A masked pattern was here #### +0 1 diff --git a/ql/src/test/results/clientpositive/orc_ppd_disable2.q.out b/ql/src/test/results/clientpositive/orc_ppd_disable2.q.out new file mode 100644 index 0000000..520955e --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_ppd_disable2.q.out @@ -0,0 +1,98 @@ +PREHOOK: query: -- force common join + +create table test_orc_ppd_disable (c0 int, c1 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_orc_ppd_disable +POSTHOOK: query: -- force common join + +create table test_orc_ppd_disable (c0 int, c1 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_orc_ppd_disable +PREHOOK: query: insert overwrite table test_orc_ppd_disable select 0, 1 from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_orc_ppd_disable +POSTHOOK: query: insert overwrite table test_orc_ppd_disable select 0, 1 from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_orc_ppd_disable +POSTHOOK: Lineage: test_orc_ppd_disable.c0 SIMPLE [] +POSTHOOK: Lineage: test_orc_ppd_disable.c1 SIMPLE [] +PREHOOK: query: explain +select * from test_orc_ppd_disable t1 +left outer join test_orc_ppd_disable t2 on (t1.c0=t2.c0 and t2.c1=0) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test_orc_ppd_disable t1 +left outer join test_orc_ppd_disable t2 on (t1.c0=t2.c0 and t2.c1=0) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c0 (type: int) + sort order: + + Map-reduce partition columns: c0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: int) + TableScan + alias: t2 + filterExpr: (c1 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c1 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c0 (type: int) + sort order: + + Map-reduce partition columns: c0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 c0 (type: int) + 1 c0 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test_orc_ppd_disable t1 +left outer join test_orc_ppd_disable t2 on (t1.c0=t2.c0 and t2.c1=0) +PREHOOK: type: QUERY +PREHOOK: Input: default@test_orc_ppd_disable +#### A masked pattern was here #### +POSTHOOK: query: select * from test_orc_ppd_disable t1 +left outer join test_orc_ppd_disable t2 on (t1.c0=t2.c0 and t2.c1=0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_orc_ppd_disable +#### A masked pattern was here #### +0 1 NULL NULL