commit 40767da4dfee5e3ba2e65dd28f15e3cddee7216d Author: Ivan Suller Date: Tue May 28 11:25:52 2019 +0200 HIVE-21784 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index 6472e8058a..2e4ef315c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -573,10 +573,18 @@ public void close(boolean abort) throws IOException { writer.close(); // normal close, when there are inserts. } } else { - if (LOG.isDebugEnabled()) { + if (options.isWritingBase()) { + // With insert overwrite we need the empty file to delete the previous content of the table + LOG.debug("Empty file has been created for overwrite: {}", path); + + OrcFile.WriterOptions wo = OrcFile.writerOptions(this.options.getConfiguration()) + .inspector(rowInspector) + .callback(new OrcRecordUpdater.KeyIndexBuilder("testEmpty")); + OrcFile.createWriter(path, wo).close(); + } else { LOG.debug("No insert events in path: {}.. Deleting..", path); + fs.delete(path, false); } - fs.delete(path, false); } } else { //so that we create empty bucket files when needed (but see HIVE-17138) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java index cbed1fc5f8..8451462023 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java @@ -538,7 +538,7 @@ public void testGetLogicalLength() throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); OrcOutputFormat of = new OrcOutputFormat(); - Path root = new Path(tmpDir, "testEmpty").makeQualified(fs); + Path root = new Path(tmpDir, "testLogicalEmpty").makeQualified(fs); fs.delete(root, true); ObjectInspector inspector; synchronized (TestOrcFile.class) { @@ -592,16 +592,6 @@ public void testEmpty() throws Exception { .inspector(inspector).bucket(BUCKET).writingBase(true) .maximumWriteId(100).finalDestination(root); of.getRecordUpdater(root, options).close(false); - { - /*OrcRecordUpdater is inconsistent about when it creates empty files and when it does not. - This creates an empty bucket. HIVE-17138*/ - OrcFile.WriterOptions wo = OrcFile.writerOptions(conf); - wo.inspector(inspector); - wo.callback(new OrcRecordUpdater.KeyIndexBuilder("testEmpty")); - Writer w = OrcFile.createWriter(AcidUtils.createBucketFile(new Path(root, - AcidUtils.baseDir(100)), BUCKET), wo); - w.close(); - } conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); ValidWriteIdList writeIdList = new ValidReaderWriteIdList("testEmpty:200:" + Long.MAX_VALUE); diff --git a/ql/src/test/queries/clientpositive/insert_overwrite.q b/ql/src/test/queries/clientpositive/insert_overwrite.q index 6e4ead3370..bc6821735c 100644 --- a/ql/src/test/queries/clientpositive/insert_overwrite.q +++ b/ql/src/test/queries/clientpositive/insert_overwrite.q @@ -20,6 +20,17 @@ SELECT count(*) FROM ext_non_part; drop table ext_non_part; +CREATE TABLE int_non_part (col string) STORED AS ORC TBLPROPERTIES ('transactional'='true'); +INSERT INTO int_non_part VALUES ('first'), ('second'); + +INSERT OVERWRITE TABLE int_non_part SELECT * FROM b; + +-- should be 0 +SELECT count(*) FROM int_non_part; + +drop table int_non_part; + + CREATE TABLE int_non_part (col string); INSERT INTO int_non_part VALUES ('first'), ('second'); diff --git a/ql/src/test/results/clientpositive/llap/insert_overwrite.q.out b/ql/src/test/results/clientpositive/llap/insert_overwrite.q.out index 22146cbebe..fbc3326b39 100644 --- a/ql/src/test/results/clientpositive/llap/insert_overwrite.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_overwrite.q.out @@ -49,6 +49,49 @@ POSTHOOK: query: drop table ext_non_part POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@ext_non_part POSTHOOK: Output: default@ext_non_part +PREHOOK: query: CREATE TABLE int_non_part (col string) STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@int_non_part +POSTHOOK: query: CREATE TABLE int_non_part (col string) STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@int_non_part +PREHOOK: query: INSERT INTO int_non_part VALUES ('first'), ('second') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@int_non_part +POSTHOOK: query: INSERT INTO int_non_part VALUES ('first'), ('second') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@int_non_part +POSTHOOK: Lineage: int_non_part.col SCRIPT [] +PREHOOK: query: INSERT OVERWRITE TABLE int_non_part SELECT * FROM b +PREHOOK: type: QUERY +PREHOOK: Input: default@b +PREHOOK: Output: default@int_non_part +POSTHOOK: query: INSERT OVERWRITE TABLE int_non_part SELECT * FROM b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@b +POSTHOOK: Output: default@int_non_part +POSTHOOK: Lineage: int_non_part.col SIMPLE [(b)b.FieldSchema(name:col, type:string, comment:null), ] +PREHOOK: query: SELECT count(*) FROM int_non_part +PREHOOK: type: QUERY +PREHOOK: Input: default@int_non_part +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM int_non_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@int_non_part +#### A masked pattern was here #### +0 +PREHOOK: query: drop table int_non_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@int_non_part +PREHOOK: Output: default@int_non_part +POSTHOOK: query: drop table int_non_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@int_non_part +POSTHOOK: Output: default@int_non_part PREHOOK: query: CREATE TABLE int_non_part (col string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default