diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index f435677..db8736f 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -492,6 +492,7 @@ spark.query.files=add_part_multiple.q, \ auto_join9.q, \ auto_join_filters.q, \ auto_join_nulls.q, \ + auto_join_ppd.q, \ auto_join_reordering_values.q, \ auto_join_stats.q, \ auto_join_stats2.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index a6d5c62..2710544 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -137,7 +137,8 @@ public void initialize(HiveConf hiveConf) { transformations.add(new ReduceSinkDeDuplication()); } transformations.add(new NonBlockingOpDeDupProc()); - if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER)) { + if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER) && + !isSparkExecEngine) { transformations.add(new IdentityProjectRemover()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) { diff --git ql/src/test/queries/clientpositive/auto_join_ppd.q ql/src/test/queries/clientpositive/auto_join_ppd.q new file mode 100644 index 0000000..1e88f1e --- /dev/null +++ ql/src/test/queries/clientpositive/auto_join_ppd.q @@ -0,0 +1,15 @@ +set hive.auto.convert.join=true; +set hive.optimize.ppd=true; + +drop table if exists test_tbl ; + +create table test_tbl (id string,name string); + +insert into table test_tbl +select * from src; + +select t2.* +from +(select id,name from (select id,name from test_tbl) t1 sort by id) t2 +join (select * from test_tbl sort by id) t3 on (t2.id=t3.id ) +where t2.name='val_105' and t3.id='105'; \ No newline at end of file diff --git ql/src/test/results/clientpositive/auto_join_ppd.q.out ql/src/test/results/clientpositive/auto_join_ppd.q.out new file mode 100644 index 0000000..081a0b3 --- /dev/null +++ ql/src/test/results/clientpositive/auto_join_ppd.q.out @@ -0,0 +1,40 @@ +PREHOOK: query: drop table if exists test_tbl +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists test_tbl +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table test_tbl (id string,name string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_tbl +POSTHOOK: query: create table test_tbl (id string,name string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_tbl +PREHOOK: query: insert into table test_tbl +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tbl +POSTHOOK: query: insert into table test_tbl +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tbl +POSTHOOK: Lineage: test_tbl.id SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tbl.name SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select t2.* +from +(select id,name from (select id,name from test_tbl) t1 sort by id) t2 +join (select * from test_tbl sort by id) t3 on (t2.id=t3.id ) +where t2.name='val_105' and t3.id='105' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tbl +#### A masked pattern was here #### +POSTHOOK: query: select t2.* +from +(select id,name from (select id,name from test_tbl) t1 sort by id) t2 +join (select * from test_tbl sort by id) t3 on (t2.id=t3.id ) +where t2.name='val_105' and t3.id='105' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tbl +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/auto_join_ppd.q.out ql/src/test/results/clientpositive/spark/auto_join_ppd.q.out new file mode 100644 index 0000000..8604db2 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join_ppd.q.out @@ -0,0 +1,41 @@ +PREHOOK: query: drop table if exists test_tbl +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists test_tbl +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table test_tbl (id string,name string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_tbl +POSTHOOK: query: create table test_tbl (id string,name string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_tbl +PREHOOK: query: insert into table test_tbl +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tbl +POSTHOOK: query: insert into table test_tbl +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tbl +POSTHOOK: Lineage: test_tbl.id SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tbl.name SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select t2.* +from +(select id,name from (select id,name from test_tbl) t1 sort by id) t2 +join (select * from test_tbl sort by id) t3 on (t2.id=t3.id ) +where t2.name='val_105' and t3.id='105' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tbl +#### A masked pattern was here #### +POSTHOOK: query: select t2.* +from +(select id,name from (select id,name from test_tbl) t1 sort by id) t2 +join (select * from test_tbl sort by id) t3 on (t2.id=t3.id ) +where t2.name='val_105' and t3.id='105' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tbl +#### A masked pattern was here #### +105 val_105