diff --git itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q new file mode 100644 index 0000000..44360b0 --- /dev/null +++ itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q @@ -0,0 +1,37 @@ +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +SET hive.blobstore.optimizations.enabled=true; +SET hive.exec.dynamic.partition.mode=nonstrict; +SET mapreduce.input.fileinputformat.split.maxsize=10; +SET hive.merge.mapfiles=true; +set hive.optimize.sort.dynamic.partition=false; + +CREATE TABLE tmp_table_merge_move (id string, name string, dt string, pid int); + +INSERT INTO tmp_table_merge_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000), ('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10002); + +CREATE EXTERNAL TABLE s3_table_merge_move (user_id string, event_name string) PARTITIONED BY (reported_date string, product_id int) LOCATION '${hiveconf:test.blobstore.path.unique}/s3_table_merge_move/'; + +INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge_move t; + +select * from s3_table_merge_move order by user_id; + +SET hive.blobstore.optimizations.enabled=false; + +INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge_move t; + +select * from s3_table_merge_move order by user_id; + +DROP TABLE s3_table_merge_move; +DROP TABLE tmp_table_merge_move; \ No newline at end of file diff --git itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q new file mode 100644 index 0000000..25562d9 --- /dev/null +++ itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q @@ -0,0 +1,37 @@ +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +SET hive.blobstore.optimizations.enabled=true; +SET hive.exec.dynamic.partition.mode=nonstrict; +SET mapreduce.input.fileinputformat.split.maxsize=10; +SET hive.merge.mapfiles=true; +set hive.optimize.sort.dynamic.partition=false; + +CREATE TABLE tmp_table_merge (id string, name string, dt string, pid int); + +INSERT INTO tmp_table_merge values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000), ('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10001); + +CREATE EXTERNAL TABLE s3_table_merge (user_id string, event_name string) PARTITIONED BY (reported_date string, product_id int) LOCATION '${hiveconf:test.blobstore.path.unique}/s3_table_merge/'; + +INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge t; + +select * from s3_table_merge order by user_id; + +SET hive.blobstore.optimizations.enabled=false; + +INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge t; + +select * from s3_table_merge order by user_id; + +DROP TABLE s3_table_merge; +DROP TABLE tmp_table_merge; \ No newline at end of file diff --git itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_move_only.q itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_move_only.q new file mode 100644 index 0000000..cb1a32b --- /dev/null +++ itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_move_only.q @@ -0,0 +1,37 @@ +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +SET hive.blobstore.optimizations.enabled=true; +SET hive.exec.dynamic.partition.mode=nonstrict; +SET mapreduce.input.fileinputformat.split.maxsize=10; +SET hive.merge.mapfiles=true; +set hive.optimize.sort.dynamic.partition=false; + +CREATE TABLE tmp_table_move (id string, name string, dt string, pid int); + +INSERT INTO tmp_table_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10001), ('u3','name3','2017-04-10',10002), ('u4','name4','2017-04-12',10001), ('u5','name5','2017-04-12',10002); + +CREATE EXTERNAL TABLE s3_table_move (user_id string, event_name string) PARTITIONED BY (reported_date string, product_id int) LOCATION '${hiveconf:test.blobstore.path.unique}/s3_table_move/'; + +INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_move t; + +select * from s3_table_move order by user_id; + +SET hive.blobstore.optimizations.enabled=false; + +INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_move t; + +select * from s3_table_move order by user_id; + +DROP TABLE s3_table_move; +DROP TABLE tmp_table_move; \ No newline at end of file diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q.out new file mode 100644 index 0000000..bfebad6 --- /dev/null +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q.out @@ -0,0 +1,138 @@ +PREHOOK: query: CREATE TABLE tmp_table_merge_move (id string, name string, dt string, pid int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_table_merge_move +POSTHOOK: query: CREATE TABLE tmp_table_merge_move (id string, name string, dt string, pid int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_table_merge_move +PREHOOK: query: INSERT INTO tmp_table_merge_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000), ('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10002) +PREHOOK: type: QUERY +PREHOOK: Output: default@tmp_table_merge_move +POSTHOOK: query: INSERT INTO tmp_table_merge_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000), ('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10002) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tmp_table_merge_move +POSTHOOK: Lineage: tmp_table_merge_move.dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: tmp_table_merge_move.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tmp_table_merge_move.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tmp_table_merge_move.pid EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: ### test.blobstore.path ###/s3_table_merge_move +PREHOOK: Output: database:default +PREHOOK: Output: default@s3_table_merge_move +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: ### test.blobstore.path ###/s3_table_merge_move +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s3_table_merge_move +PREHOOK: query: INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge_move t +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_table_merge_move +PREHOOK: Output: default@s3_table_merge_move +POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge_move t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_table_merge_move +POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002 +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10000).event_name SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10000).user_id SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10001).event_name SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10001).user_id SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10002).event_name SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10002).user_id SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ] +PREHOOK: query: select * from s3_table_merge_move order by user_id +PREHOOK: type: QUERY +PREHOOK: Input: default@s3_table_merge_move +PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000 +PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001 +PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002 +#### A masked pattern was here #### +POSTHOOK: query: select * from s3_table_merge_move order by user_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s3_table_merge_move +POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002 +#### A masked pattern was here #### +u1 name1 2017-04-10 10000 +u2 name2 2017-04-10 10000 +u3 name3 2017-04-10 10000 +u4 name4 2017-04-10 10001 +u5 name5 2017-04-10 10002 +PREHOOK: query: INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge_move t +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_table_merge_move +PREHOOK: Output: default@s3_table_merge_move +POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge_move t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_table_merge_move +POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002 +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10000).event_name SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10000).user_id SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10001).event_name SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10001).user_id SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10002).event_name SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10002).user_id SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ] +PREHOOK: query: select * from s3_table_merge_move order by user_id +PREHOOK: type: QUERY +PREHOOK: Input: default@s3_table_merge_move +PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000 +PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001 +PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002 +#### A masked pattern was here #### +POSTHOOK: query: select * from s3_table_merge_move order by user_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s3_table_merge_move +POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002 +#### A masked pattern was here #### +u1 name1 2017-04-10 10000 +u2 name2 2017-04-10 10000 +u3 name3 2017-04-10 10000 +u4 name4 2017-04-10 10001 +u5 name5 2017-04-10 10002 +PREHOOK: query: DROP TABLE s3_table_merge_move +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@s3_table_merge_move +PREHOOK: Output: default@s3_table_merge_move +POSTHOOK: query: DROP TABLE s3_table_merge_move +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@s3_table_merge_move +POSTHOOK: Output: default@s3_table_merge_move +PREHOOK: query: DROP TABLE tmp_table_merge_move +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tmp_table_merge_move +PREHOOK: Output: default@tmp_table_merge_move +POSTHOOK: query: DROP TABLE tmp_table_merge_move +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tmp_table_merge_move +POSTHOOK: Output: default@tmp_table_merge_move diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q.out new file mode 100644 index 0000000..1bffae3 --- /dev/null +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q.out @@ -0,0 +1,128 @@ +PREHOOK: query: CREATE TABLE tmp_table_merge (id string, name string, dt string, pid int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_table_merge +POSTHOOK: query: CREATE TABLE tmp_table_merge (id string, name string, dt string, pid int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_table_merge +PREHOOK: query: INSERT INTO tmp_table_merge values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000), ('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10001) +PREHOOK: type: QUERY +PREHOOK: Output: default@tmp_table_merge +POSTHOOK: query: INSERT INTO tmp_table_merge values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000), ('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10001) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tmp_table_merge +POSTHOOK: Lineage: tmp_table_merge.dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: tmp_table_merge.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tmp_table_merge.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tmp_table_merge.pid EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: ### test.blobstore.path ###/s3_table_merge +PREHOOK: Output: database:default +PREHOOK: Output: default@s3_table_merge +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: ### test.blobstore.path ###/s3_table_merge +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s3_table_merge +PREHOOK: query: INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge t +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_table_merge +PREHOOK: Output: default@s3_table_merge +POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_table_merge +POSTHOOK: Output: default@s3_table_merge@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Output: default@s3_table_merge@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10000).event_name SIMPLE [(tmp_table_merge)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10000).user_id SIMPLE [(tmp_table_merge)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10001).event_name SIMPLE [(tmp_table_merge)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10001).user_id SIMPLE [(tmp_table_merge)t.FieldSchema(name:id, type:string, comment:null), ] +PREHOOK: query: select * from s3_table_merge order by user_id +PREHOOK: type: QUERY +PREHOOK: Input: default@s3_table_merge +PREHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10000 +PREHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10001 +#### A masked pattern was here #### +POSTHOOK: query: select * from s3_table_merge order by user_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s3_table_merge +POSTHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10001 +#### A masked pattern was here #### +u1 name1 2017-04-10 10000 +u2 name2 2017-04-10 10000 +u3 name3 2017-04-10 10000 +u4 name4 2017-04-10 10001 +u5 name5 2017-04-10 10001 +PREHOOK: query: INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge t +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_table_merge +PREHOOK: Output: default@s3_table_merge +POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_merge t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_table_merge +POSTHOOK: Output: default@s3_table_merge@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Output: default@s3_table_merge@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10000).event_name SIMPLE [(tmp_table_merge)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10000).user_id SIMPLE [(tmp_table_merge)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10001).event_name SIMPLE [(tmp_table_merge)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10001).user_id SIMPLE [(tmp_table_merge)t.FieldSchema(name:id, type:string, comment:null), ] +PREHOOK: query: select * from s3_table_merge order by user_id +PREHOOK: type: QUERY +PREHOOK: Input: default@s3_table_merge +PREHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10000 +PREHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10001 +#### A masked pattern was here #### +POSTHOOK: query: select * from s3_table_merge order by user_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s3_table_merge +POSTHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10001 +#### A masked pattern was here #### +u1 name1 2017-04-10 10000 +u2 name2 2017-04-10 10000 +u3 name3 2017-04-10 10000 +u4 name4 2017-04-10 10001 +u5 name5 2017-04-10 10001 +PREHOOK: query: DROP TABLE s3_table_merge +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@s3_table_merge +PREHOOK: Output: default@s3_table_merge +POSTHOOK: query: DROP TABLE s3_table_merge +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@s3_table_merge +POSTHOOK: Output: default@s3_table_merge +PREHOOK: query: DROP TABLE tmp_table_merge +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tmp_table_merge +PREHOOK: Output: default@tmp_table_merge +POSTHOOK: query: DROP TABLE tmp_table_merge +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tmp_table_merge +POSTHOOK: Output: default@tmp_table_merge diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_move_only.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_move_only.q.out new file mode 100644 index 0000000..530c036 --- /dev/null +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_move_only.q.out @@ -0,0 +1,148 @@ +PREHOOK: query: CREATE TABLE tmp_table_move (id string, name string, dt string, pid int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_table_move +POSTHOOK: query: CREATE TABLE tmp_table_move (id string, name string, dt string, pid int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_table_move +PREHOOK: query: INSERT INTO tmp_table_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10001), ('u3','name3','2017-04-10',10002), ('u4','name4','2017-04-12',10001), ('u5','name5','2017-04-12',10002) +PREHOOK: type: QUERY +PREHOOK: Output: default@tmp_table_move +POSTHOOK: query: INSERT INTO tmp_table_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10001), ('u3','name3','2017-04-10',10002), ('u4','name4','2017-04-12',10001), ('u5','name5','2017-04-12',10002) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tmp_table_move +POSTHOOK: Lineage: tmp_table_move.dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: tmp_table_move.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tmp_table_move.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tmp_table_move.pid EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: ### test.blobstore.path ###/s3_table_move +PREHOOK: Output: database:default +PREHOOK: Output: default@s3_table_move +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: ### test.blobstore.path ###/s3_table_move +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s3_table_move +PREHOOK: query: INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_move t +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_table_move +PREHOOK: Output: default@s3_table_move +POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_move t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_table_move +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10002 +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-12/product_id=10001 +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-12/product_id=10002 +PREHOOK: query: select * from s3_table_move order by user_id +PREHOOK: type: QUERY +PREHOOK: Input: default@s3_table_move +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10000 +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10001 +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10002 +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10001 +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10002 +#### A masked pattern was here #### +POSTHOOK: query: select * from s3_table_move order by user_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s3_table_move +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10002 +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10001 +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10002 +#### A masked pattern was here #### +u1 name1 2017-04-10 10000 +u2 name2 2017-04-10 10001 +u3 name3 2017-04-10 10002 +u4 name4 2017-04-12 10001 +u5 name5 2017-04-12 10002 +PREHOOK: query: INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_move t +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_table_move +PREHOOK: Output: default@s3_table_move +POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id) +SELECT + t.id as user_id, + t.name as event_name, + t.dt as reported_date, + t.pid as product_id +FROM tmp_table_move t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_table_move +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10002 +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-12/product_id=10001 +POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-12/product_id=10002 +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10000).event_name SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10000).user_id SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10001).event_name SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10001).user_id SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10002).event_name SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10002).user_id SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-12,product_id=10001).event_name SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-12,product_id=10001).user_id SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-12,product_id=10002).event_name SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-12,product_id=10002).user_id SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ] +PREHOOK: query: select * from s3_table_move order by user_id +PREHOOK: type: QUERY +PREHOOK: Input: default@s3_table_move +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10000 +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10001 +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10002 +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10001 +PREHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10002 +#### A masked pattern was here #### +POSTHOOK: query: select * from s3_table_move order by user_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s3_table_move +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10000 +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10001 +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10002 +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10001 +POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10002 +#### A masked pattern was here #### +u1 name1 2017-04-10 10000 +u2 name2 2017-04-10 10001 +u3 name3 2017-04-10 10002 +u4 name4 2017-04-12 10001 +u5 name5 2017-04-12 10002 +PREHOOK: query: DROP TABLE s3_table_move +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@s3_table_move +PREHOOK: Output: default@s3_table_move +POSTHOOK: query: DROP TABLE s3_table_move +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@s3_table_move +POSTHOOK: Output: default@s3_table_move +PREHOOK: query: DROP TABLE tmp_table_move +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tmp_table_move +PREHOOK: Output: default@tmp_table_move +POSTHOOK: query: DROP TABLE tmp_table_move +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tmp_table_move +POSTHOOK: Output: default@tmp_table_move diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 88bf829..571b248 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1764,6 +1764,7 @@ private static ConditionalTask createCondTask(HiveConf conf, cndTsk.setResolver(new ConditionalResolverMergeFiles()); ConditionalResolverMergeFilesCtx mrCtx = new ConditionalResolverMergeFilesCtx(listTasks, condInputPath.toString()); + mrCtx.setMoveOnlyMoveTaskChanged(shouldMergeMovePaths); cndTsk.setResolverCtx(mrCtx); // make the conditional task as the child of the current leaf task diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index 4266569..32bb457 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -54,6 +54,7 @@ public ConditionalResolverMergeFiles() { private String dir; private DynamicPartitionCtx dpCtx; // merge task could be after dynamic partition insert private ListBucketingCtx lbCtx; + private boolean moveOnlyMoveTaskChanged; public ConditionalResolverMergeFilesCtx() { } @@ -118,6 +119,14 @@ public ListBucketingCtx getLbCtx() { public void setLbCtx(ListBucketingCtx lbCtx) { this.lbCtx = lbCtx; } + + public boolean isMoveOnlyMoveTaskChanged() { + return moveOnlyMoveTaskChanged; + } + + public void setMoveOnlyMoveTaskChanged(boolean moveOnlyMoveTaskChanged) { + this.moveOnlyMoveTaskChanged = moveOnlyMoveTaskChanged; + } } public List> getTasks(HiveConf conf, @@ -285,6 +294,19 @@ private void generateActualTasks(HiveConf conf, List mergeAndMoveMoveTask = mrAndMvTask.getChildTasks().get(0); + mvWork = (MoveWork) mergeAndMoveMoveTask.getWork(); + } + LoadFileDesc lfd = mvWork.getLoadFileWork(); Path targetDir = lfd.getTargetDir();