commit e050f6933d6f9ba6c363bb045759824597854896 Author: Ivan Suller Date: Fri Apr 12 11:02:33 2019 +0200 HIVE-21714 diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java index 7180564812..7f9de00487 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java @@ -1363,7 +1363,7 @@ public void sqlInsertPartition() throws Exception { // Test a limit higher than available events testEventCounts(defaultDbName, firstEventId, null, 100, 13); // Test toEventId lower than current eventId - testEventCounts(defaultDbName, firstEventId, (long) firstEventId + 5, null, 5); + testEventCounts(defaultDbName, firstEventId, firstEventId + 5, null, 5); // Event 10, 11, 12 driver.run("insert into table " + tblName + " partition (ds = 'yesterday') values (2)"); @@ -1448,7 +1448,8 @@ public void sqlInsertPartition() throws Exception { insertMsg = md.getInsertMessage(event.getMessage()); assertTrue(insertMsg.isReplace()); // replace-overwrite introduces no new files - assertTrue(event.getMessage().matches(".*\"files\":\\[\\].*")); + // the insert overwrite creates an empty file with the current change + //assertTrue(event.getMessage().matches(".*\"files\":\\[\\].*")); event = rsp.getEvents().get(29); assertEquals(firstEventId + 30, event.getEventId()); @@ -1464,11 +1465,11 @@ public void sqlInsertPartition() throws Exception { // Test a limit within the available events testEventCounts(defaultDbName, firstEventId, null, 10, 10); // Test toEventId greater than current eventId - testEventCounts(defaultDbName, firstEventId, (long) firstEventId + 100, null, 31); + testEventCounts(defaultDbName, firstEventId, firstEventId + 100, null, 31); // Test toEventId greater than current eventId with some limit within available events - testEventCounts(defaultDbName, firstEventId, (long) firstEventId + 100, 10, 10); + testEventCounts(defaultDbName, firstEventId, firstEventId + 100, 10, 10); // Test toEventId greater than current eventId with some limit beyond available events - testEventCounts(defaultDbName, firstEventId, (long) firstEventId + 100, 50, 31); + testEventCounts(defaultDbName, firstEventId, firstEventId + 100, 50, 31); } private void verifyInsert(NotificationEvent event, String dbName, String tblName) throws Exception { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index cd13397a79..9ad4e71482 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -98,7 +98,7 @@ **/ @SuppressWarnings("deprecation") public class FileSinkOperator extends TerminalOperator implements - Serializable { + Serializable, IConfigureJobConf { public static final Logger LOG = LoggerFactory.getLogger(FileSinkOperator.class); @@ -1260,13 +1260,12 @@ public void closeOp(boolean abort) throws HiveException { } if (!bDynParts && !filesCreated) { - boolean skipFiles = "tez".equalsIgnoreCase( + boolean isTez = "tez".equalsIgnoreCase( HiveConf.getVar(hconf, ConfVars.HIVE_EXECUTION_ENGINE)); - if (skipFiles) { - Class clazz = conf.getTableInfo().getOutputFileFormatClass(); - skipFiles = !StreamingOutputFormat.class.isAssignableFrom(clazz); - } - if (!skipFiles) { + Class clazz = conf.getTableInfo().getOutputFileFormatClass(); + boolean isStreaming = StreamingOutputFormat.class.isAssignableFrom(clazz); + + if (!isTez || isStreaming || this.isInsertOverwrite) { createBucketFiles(fsp); } } @@ -1607,4 +1606,10 @@ private boolean isNativeTable() { return !conf.getTableInfo().isNonNative(); } + @Override + public void configureJobConf(JobConf job) { + if (conf.getInsertOverwrite()) { + job.setBoolean(Utilities.ENSURE_OPERATORS_EXECUTED, true); + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 55ae535df5..287f979d1e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -6069,3 +6069,4 @@ public StorageHandlerInfo getStorageHandlerInfo(Table table) } } } + diff --git a/ql/src/test/queries/clientpositive/insert_overwrite.q b/ql/src/test/queries/clientpositive/insert_overwrite.q index 12dd1b57a2..6e4ead3370 100644 --- a/ql/src/test/queries/clientpositive/insert_overwrite.q +++ b/ql/src/test/queries/clientpositive/insert_overwrite.q @@ -3,9 +3,15 @@ set hive.stats.column.autogather=false; set hive.stats.autogather=false; set hive.compute.query.using.stats=false; +set hive.create.as.insert.only=true; +set hive.default.fileformat.managed=ORC; +set hive.strict.managed.tables=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + CREATE EXTERNAL TABLE ext_non_part (col string); INSERT INTO ext_non_part VALUES ('first'), ('second'); -CREATE TABLE b LIKE ext_non_part; +CREATE TABLE b (col string); INSERT OVERWRITE TABLE ext_non_part SELECT * FROM b; @@ -23,6 +29,22 @@ INSERT OVERWRITE TABLE int_non_part SELECT * FROM b; SELECT count(*) FROM int_non_part; drop table int_non_part; + +CREATE TABLE int_buck (col string) +CLUSTERED BY (col) INTO 4 BUCKETS; + +INSERT INTO int_buck VALUES ('first'), ('second'), ('third'), ('fourth'); + +-- should be 4 +SELECT count(*) FROM int_buck; + +INSERT OVERWRITE TABLE int_buck SELECT col FROM b; + +-- should be 0 +SELECT count(*) FROM int_buck; +SELECT * FROM int_buck; + +drop table int_buck; drop table b; diff --git a/ql/src/test/results/clientpositive/llap/insert_overwrite.q.out b/ql/src/test/results/clientpositive/llap/insert_overwrite.q.out index 68f7cc895d..af6cf590fa 100644 --- a/ql/src/test/results/clientpositive/llap/insert_overwrite.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_overwrite.q.out @@ -15,11 +15,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@ext_non_part POSTHOOK: Lineage: ext_non_part.col SCRIPT [] -PREHOOK: query: CREATE TABLE b LIKE ext_non_part +PREHOOK: query: CREATE TABLE b (col string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@b -POSTHOOK: query: CREATE TABLE b LIKE ext_non_part +POSTHOOK: query: CREATE TABLE b (col string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@b @@ -92,6 +92,68 @@ POSTHOOK: query: drop table int_non_part POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@int_non_part POSTHOOK: Output: default@int_non_part +PREHOOK: query: CREATE TABLE int_buck (col string) +CLUSTERED BY (col) INTO 4 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@int_buck +POSTHOOK: query: CREATE TABLE int_buck (col string) +CLUSTERED BY (col) INTO 4 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@int_buck +PREHOOK: query: INSERT INTO int_buck VALUES ('first'), ('second'), ('third'), ('fourth') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@int_buck +POSTHOOK: query: INSERT INTO int_buck VALUES ('first'), ('second'), ('third'), ('fourth') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@int_buck +POSTHOOK: Lineage: int_buck.col SCRIPT [] +PREHOOK: query: SELECT count(*) FROM int_buck +PREHOOK: type: QUERY +PREHOOK: Input: default@int_buck +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM int_buck +POSTHOOK: type: QUERY +POSTHOOK: Input: default@int_buck +#### A masked pattern was here #### +4 +PREHOOK: query: INSERT OVERWRITE TABLE int_buck SELECT col FROM b +PREHOOK: type: QUERY +PREHOOK: Input: default@b +PREHOOK: Output: default@int_buck +POSTHOOK: query: INSERT OVERWRITE TABLE int_buck SELECT col FROM b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@b +POSTHOOK: Output: default@int_buck +POSTHOOK: Lineage: int_buck.col SIMPLE [(b)b.FieldSchema(name:col, type:string, comment:null), ] +PREHOOK: query: SELECT count(*) FROM int_buck +PREHOOK: type: QUERY +PREHOOK: Input: default@int_buck +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM int_buck +POSTHOOK: type: QUERY +POSTHOOK: Input: default@int_buck +#### A masked pattern was here #### +0 +PREHOOK: query: SELECT * FROM int_buck +PREHOOK: type: QUERY +PREHOOK: Input: default@int_buck +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM int_buck +POSTHOOK: type: QUERY +POSTHOOK: Input: default@int_buck +#### A masked pattern was here #### +PREHOOK: query: drop table int_buck +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@int_buck +PREHOOK: Output: default@int_buck +POSTHOOK: query: drop table int_buck +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@int_buck +POSTHOOK: Output: default@int_buck PREHOOK: query: drop table b PREHOOK: type: DROPTABLE PREHOOK: Input: default@b diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index e3bebbc6d5..c9c1e8d511 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -732,7 +732,7 @@ STAGE PLANS: columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) #### A masked pattern was here #### name default.l3_monthly_dw_dimplan - numFiles 1 + numFiles 64 numRows 180340 rawDataSize 269826156 serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} @@ -755,7 +755,7 @@ STAGE PLANS: columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) #### A masked pattern was here #### name default.l3_monthly_dw_dimplan - numFiles 1 + numFiles 64 numRows 180340 rawDataSize 269826156 serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} @@ -1241,7 +1241,7 @@ STAGE PLANS: columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) #### A masked pattern was here #### name default.l3_monthly_dw_dimplan - numFiles 1 + numFiles 64 numRows 180340 rawDataSize 269826156 serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} @@ -1264,7 +1264,7 @@ STAGE PLANS: columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) #### A masked pattern was here #### name default.l3_monthly_dw_dimplan - numFiles 1 + numFiles 64 numRows 180340 rawDataSize 269826156 serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by}