Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1198626) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -482,6 +482,7 @@ // The class responsible for logging client side performance metrics // Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger HIVE_PERF_LOGGER("hive.exec.perf.logger", "org.apache.hadoop.hive.ql.log.PerfLogger"), + HIVE_LOAD_FILE_EXISTS_RENAME("hive.load.file.exists.rename", false), // Whether to delete the scratchdir while startup HIVE_START_CLEANUP_SCRATCHDIR("hive.start.cleanup.scratchdir", false), HIVE_INSERT_INTO_MULTILEVEL_DIRS("hive.insert.into.multilevel.dirs", false), Index: conf/hive-default.xml =================================================================== --- conf/hive-default.xml (revision 1198626) +++ conf/hive-default.xml (working copy) @@ -1212,4 +1212,13 @@ "insert directory '/HIVEFT25686/chinna/' from table" + + hive.load.file.exists.rename + false + Whether to load the file if it is already exist with same name. + False means it will throw exception if file already exist with same name. + True means it will load file successfully. + + + Index: data/conf/hive-site.xml =================================================================== --- data/conf/hive-site.xml (revision 1198626) +++ data/conf/hive-site.xml (working copy) @@ -176,4 +176,13 @@ The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. + + hive.load.file.exists.rename + true + Whether to load the file if it is already exist with same name. + False means it will throw exception if file already exist with same name. + True means it will load file successfully. + + + Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 1198626) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy) @@ -1170,7 +1170,7 @@ Hive.replaceFiles(loadPath, newPartPath, oldPartPath, getConf()); } else { FileSystem fs = FileSystem.get(tbl.getDataLocation(), getConf()); - Hive.copyFiles(loadPath, newPartPath, fs); + Hive.copyFiles(loadPath, newPartPath, fs, getConf()); } // recreate the partition if it existed before @@ -1870,9 +1870,11 @@ } } - static private void checkPaths(FileSystem fs, FileStatus[] srcs, Path destf, - boolean replace) throws HiveException { + static private Map checkPaths(FileSystem fs, FileStatus[] srcs, Path destf, + boolean replace, Configuration conf) throws HiveException { + HashMap renameFiles = new HashMap(); try { + Path proposedStaging = null; for (FileStatus src : srcs) { FileStatus[] items = fs.listStatus(src.getPath()); for (FileStatus item : items) { @@ -1917,9 +1919,14 @@ Path itemDest = new Path(destf, itemStaging.getName()); Path itemStagingBase = new Path(itemStaging.getParent(), name); - while (fs.exists(itemDest)) { - Path proposedStaging = itemStagingBase - .suffix("_copy_" + counter++).suffix(filetype); + boolean exists = fs.exists(itemDest); + + if (exists + && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOAD_FILE_EXISTS_RENAME)) { + throw new HiveException(itemDest.getName() + " file already exists"); + } + while (exists) { + proposedStaging = itemStagingBase.suffix("_copy_" + counter++).suffix(filetype); Path proposedDest = new Path(destf, proposedStaging.getName()); if (fs.exists(proposedDest)) { @@ -1929,23 +1936,29 @@ continue; } - if (!fs.rename(itemStaging, proposedStaging)) { - LOG.debug("Unsuccessfully in attempt to rename " + itemStaging + " to " + proposedStaging + "..."); - continue; - } - - LOG.debug("Successfully renamed " + itemStaging + " to " + proposedStaging); - itemDest = proposedDest; + renameFiles.put(itemStaging, proposedStaging); + exists = false; } } } } + for (Entry pathSet : renameFiles.entrySet()) { + proposedStaging = pathSet.getValue(); + Path itemStaging = pathSet.getKey(); + if (!fs.rename(itemStaging, proposedStaging)) { + LOG.debug("Unsuccessfully in attempt to rename " + itemStaging + " to " + + proposedStaging + "..."); + continue; + } + LOG.debug("Successfully renamed " + itemStaging + " to " + proposedStaging); + } } catch (IOException e) { throw new HiveException("checkPaths: filesystem error in check phase", e); } + return renameFiles; } - static protected void copyFiles(Path srcf, Path destf, FileSystem fs) + static protected void copyFiles(Path srcf, Path destf, FileSystem fs, HiveConf conf) throws HiveException { try { // create the destination if it does not exist @@ -1971,22 +1984,51 @@ // srcs = new FileStatus[0]; Why is this needed? } // check that source and target paths exist - checkPaths(fs, srcs, destf, false); + Map renamedFiles = checkPaths(fs, srcs, destf, false, conf); // move it, move it + Path renamedPath = null; + boolean renameDone = true; + Path sourcePath = null; try { for (FileStatus src : srcs) { FileStatus[] items = fs.listStatus(src.getPath()); - for (FileStatus item : items) { - Path source = item.getPath(); - Path target = new Path(destf, item.getPath().getName()); - if (!fs.rename(source, target)) { - throw new IOException("Cannot move " + source + " to " + target); + // The else scenario will come when it is trying to load file like "/tmp/1.txt" and + // it is already exist in the table so in checkPaths method + // it is renamed to "/tmp/1_copy_1.txt" and the fs.listStatus(src.getPath()) will return + // empty items. + if (null != items && items.length != 0) { + for (FileStatus item : items) { + Path source = item.getPath(); + Path target = new Path(destf, item.getPath().getName()); + if (!fs.rename(source, target)) { + throw new IOException("Cannot move " + source + " to " + target); + } } + } else { + sourcePath = src.getPath(); + renamedPath = renamedFiles.get(sourcePath); + if (null != renamedPath) { + Path target = new Path(destf, renamedPath.getName()); + renameDone = fs.rename(renamedPath, target); + if (!renameDone) { + throw new IOException("Cannot move " + renamedPath + " to " + target); + } + } } } } catch (IOException e) { throw new HiveException("copyFiles: error while moving files!!!", e); + } finally { + try { + if (!renameDone + && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOAD_FILE_EXISTS_RENAME)) { + fs.rename(renamedPath, sourcePath); + } + } catch (IOException e) { + LOG.info("Renaming of the path " + renamedPath.getName() + " to " + + sourcePath.getName() + " is failed "); + } } } @@ -2022,7 +2064,7 @@ LOG.info("No sources specified to move: " + srcf); return; } - checkPaths(fs, srcs, destf, true); + checkPaths(fs, srcs, destf, true, conf); // point of no return -- delete oldPath if (oldPath != null) { Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (revision 1198626) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (working copy) @@ -544,7 +544,7 @@ FileSystem fs; try { fs = FileSystem.get(getDataLocation(), Hive.get().getConf()); - Hive.copyFiles(srcf, new Path(getDataLocation().getPath()), fs); + Hive.copyFiles(srcf, new Path(getDataLocation().getPath()), fs, Hive.get().getConf()); } catch (IOException e) { throw new HiveException("addFiles: filesystem error in check phase", e); } Index: ql/src/test/queries/clientpositive/input47.q =================================================================== --- ql/src/test/queries/clientpositive/input47.q (revision 0) +++ ql/src/test/queries/clientpositive/input47.q (revision 0) @@ -0,0 +1,12 @@ +create table load_overwrite (key string, value string) stored as textfile location 'file:/tmp/load_overwrite'; +create table load_overwrite2 (key string, value string) stored as textfile location 'file:/tmp1/load2_overwrite2'; +create table load_overwrite3 (key string, value string) stored as textfile location 'file:/tmp/load2_overwrite3'; + +load data local inpath '../data/files/kv1.txt' into table load_overwrite; +load data local inpath '../data/files/kv1.txt' into table load_overwrite2; + +load data inpath '/tmp/load_overwrite/kv1.txt' into table load_overwrite3; +select count(1) from load_overwrite3; + +load data inpath '/tmp1/load2_overwrite2/kv1.txt' into table load_overwrite3; +select count(1) from load_overwrite3; Index: ql/src/test/results/clientpositive/input47.q.out =================================================================== --- ql/src/test/results/clientpositive/input47.q.out (revision 0) +++ ql/src/test/results/clientpositive/input47.q.out (revision 0) @@ -0,0 +1,57 @@ +PREHOOK: query: create table load_overwrite (key string, value string) stored as textfile location 'file:/tmp/load_overwrite' +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table load_overwrite (key string, value string) stored as textfile location 'file:/tmp/load_overwrite' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@load_overwrite +PREHOOK: query: create table load_overwrite2 (key string, value string) stored as textfile location 'file:/tmp1/load2_overwrite2' +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table load_overwrite2 (key string, value string) stored as textfile location 'file:/tmp1/load2_overwrite2' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@load_overwrite2 +PREHOOK: query: create table load_overwrite3 (key string, value string) stored as textfile location 'file:/tmp/load2_overwrite3' +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table load_overwrite3 (key string, value string) stored as textfile location 'file:/tmp/load2_overwrite3' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@load_overwrite3 +PREHOOK: query: load data local inpath '../data/files/kv1.txt' into table load_overwrite +PREHOOK: type: LOAD +PREHOOK: Output: default@load_overwrite +POSTHOOK: query: load data local inpath '../data/files/kv1.txt' into table load_overwrite +POSTHOOK: type: LOAD +POSTHOOK: Output: default@load_overwrite +PREHOOK: query: load data local inpath '../data/files/kv1.txt' into table load_overwrite2 +PREHOOK: type: LOAD +PREHOOK: Output: default@load_overwrite2 +POSTHOOK: query: load data local inpath '../data/files/kv1.txt' into table load_overwrite2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@load_overwrite2 +PREHOOK: query: load data inpath '/tmp/load_overwrite/kv1.txt' into table load_overwrite3 +PREHOOK: type: LOAD +PREHOOK: Output: default@load_overwrite3 +POSTHOOK: query: load data inpath '/tmp/load_overwrite/kv1.txt' into table load_overwrite3 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@load_overwrite3 +PREHOOK: query: select count(1) from load_overwrite3 +PREHOOK: type: QUERY +PREHOOK: Input: default@load_overwrite3 +PREHOOK: Output: file:/tmp/root/hive_2011-11-08_01-05-54_673_6246422200043940190/-mr-10000 +POSTHOOK: query: select count(1) from load_overwrite3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load_overwrite3 +POSTHOOK: Output: file:/tmp/root/hive_2011-11-08_01-05-54_673_6246422200043940190/-mr-10000 +500 +PREHOOK: query: load data inpath '/tmp1/load2_overwrite2/kv1.txt' into table load_overwrite3 +PREHOOK: type: LOAD +PREHOOK: Output: default@load_overwrite3 +POSTHOOK: query: load data inpath '/tmp1/load2_overwrite2/kv1.txt' into table load_overwrite3 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@load_overwrite3 +PREHOOK: query: select count(1) from load_overwrite3 +PREHOOK: type: QUERY +PREHOOK: Input: default@load_overwrite3 +PREHOOK: Output: file:/tmp/root/hive_2011-11-08_01-06-02_389_4001132346103404410/-mr-10000 +POSTHOOK: query: select count(1) from load_overwrite3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load_overwrite3 +POSTHOOK: Output: file:/tmp/root/hive_2011-11-08_01-06-02_389_4001132346103404410/-mr-10000 +1000