diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java index 367f4ea..b49593f 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Map.Entry; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -37,14 +38,14 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; -import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.mapred.JobConf; @@ -885,7 +886,13 @@ private void registerPartitions(JobContext context) throws IOException{ moveTaskOutputs(fs, src, src, dest, true, table.isImmutable()); moveTaskOutputs(fs,src,src,dest,false,table.isImmutable()); if (!src.equals(dest)){ - fs.delete(src, true); + if (src.toString().matches(".*" + Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+.*")){ + // src is scratch directory, need to trim the part key value pairs from path + String diff = StringUtils.difference(src.toString(), dest.toString()); + fs.delete(new Path(StringUtils.substringBefore(src.toString(), diff)), true); + } else { + fs.delete(src, true); + } } // Now, we check if the partition already exists. If not, we go ahead. diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java index 86d705c..b6f8a6f 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java @@ -36,14 +36,14 @@ import java.util.Properties; import java.util.Set; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.StorageFormats; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; - import org.apache.hive.hcatalog.HcatTestUtils; import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; - import org.apache.pig.EvalFunc; import org.apache.pig.ExecType; import org.apache.pig.PigException; @@ -52,15 +52,12 @@ import org.apache.pig.data.Tuple; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.util.LogUtils; - import org.joda.time.DateTime; import org.joda.time.DateTimeZone; - import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -505,7 +502,7 @@ public void testPartColsInData() throws IOException, CommandNeedRetryException { } @Test - public void testMultiPartColsInData() throws IOException, CommandNeedRetryException { + public void testMultiPartColsInData() throws Exception { assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); driver.run("drop table employee"); @@ -545,11 +542,17 @@ public void testMultiPartColsInData() throws IOException, CommandNeedRetryExcept assertEquals(inputData[1], results.get(1)); assertEquals(inputData[2], results.get(2)); assertEquals(inputData[3], results.get(3)); + // verify the directories in table location + Path path = new Path(client.getTable("default","employee").getSd().getLocation()); + FileSystem fs = path.getFileSystem(hiveConf); + assertEquals(1, fs.listStatus(path).length); + assertEquals(4, fs.listStatus(new Path(client.getTable("default","employee").getSd().getLocation() + + File.separator + "emp_country=IN")).length); driver.run("drop table employee"); } @Test - public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryException { + public void testStoreInPartiitonedTbl() throws Exception { assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); driver.run("drop table junit_unparted"); @@ -582,6 +585,10 @@ public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryExce assertFalse(itr.hasNext()); assertEquals(11, i); + // verify the scratch directories has been cleaned up + Path path = new Path(client.getTable("default","junit_unparted").getSd().getLocation()); + FileSystem fs = path.getFileSystem(hiveConf); + assertEquals(1, fs.listStatus(path).length); } @Test