From 90aace239f12136c357d0d31d07ddfdcfeeae702 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Wed, 10 Feb 2016 15:51:04 -0800 Subject: [PATCH] HIVE-13040 : Handle empty bucket creations more efficiently --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java | 4 ++-- .../org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java | 13 +++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 7a62ff9..ab0635e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -1480,7 +1480,7 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I taskIDToFile = removeTempOrDuplicateFiles(items, fs); // if the table is bucketed and enforce bucketing, we should check and generate all buckets - if (dpCtx.getNumBuckets() > 0 && taskIDToFile != null) { + if (dpCtx.getNumBuckets() > 0 && taskIDToFile != null && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { // refresh the file list items = fs.listStatus(parts[i].getPath()); // get the missing buckets and generate empty buckets @@ -1500,7 +1500,7 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I FileStatus[] items = fs.listStatus(path); taskIDToFile = removeTempOrDuplicateFiles(items, fs); if(taskIDToFile != null && taskIDToFile.size() > 0 && conf != null && conf.getTable() != null - && (conf.getTable().getNumBuckets() > taskIDToFile.size())) { + && (conf.getTable().getNumBuckets() > taskIDToFile.size()) && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { // get the missing buckets and generate empty buckets for non-dynamic partition String taskID1 = taskIDToFile.keySet().iterator().next(); Path bucketPath = taskIDToFile.values().iterator().next().getPath(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java index 3fb6a86..355c0bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java @@ -110,15 +110,12 @@ public void close(Reporter reporter) throws IOException { @Override public void close(boolean b) throws IOException { - // if we haven't written any rows, we need to create a file with a - // generic schema. if (writer == null) { - // a row with no columns - ObjectInspector inspector = ObjectInspectorFactory. - getStandardStructObjectInspector(new ArrayList(), - new ArrayList()); - options.inspector(inspector); - writer = OrcFile.createWriter(path, options); + // we are closing a file without writing any data in it + FileSystem fs = options.getFileSystem() == null ? + path.getFileSystem(options.getConfiguration()) : options.getFileSystem(); + fs.createNewFile(path); + return; } writer.close(); } -- 1.7.12.4 (Apple Git-37)