diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java index 86a84a4..aba8b6a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java @@ -201,6 +201,14 @@ public class LoadIncrementalHFiles extends Configured implements Tool { } Path familyDir = familyStat.getPath(); byte[] familyName = familyDir.getName().getBytes(); + // Skip non-valid family + try { + HColumnDescriptor.isLegalFamilyName(familyName); + } + catch (IllegalArgumentException e) { + LOG.warn("Skipping invalid family " + familyStat.getPath()); + continue; + } TFamily family = visitor.bulkFamily(familyName); FileStatus[] hfileStatuses = fs.listStatus(familyDir); @@ -632,9 +640,9 @@ public class LoadIncrementalHFiles extends Configured implements Tool { byte[] splitKey) throws IOException { final Path hfilePath = item.hfilePath; - // We use a '_' prefix which is ignored when walking directory trees - // above. - final String TMP_DIR = "_tmp"; + // We use a '.' prefix which is ignored when walking directory trees + // above. It is a non-valid family name. + final String TMP_DIR = ".tmp"; Path tmpDir = item.hfilePath.getParent(); if (!tmpDir.getName().equals(TMP_DIR)) { tmpDir = new Path(tmpDir, TMP_DIR); @@ -661,6 +669,17 @@ public class LoadIncrementalHFiles extends Configured implements Tool { lqis.add(new LoadQueueItem(item.family, botOut)); lqis.add(new LoadQueueItem(item.family, topOut)); + // If the current item is already the result of previous splits, + // we don't need it anymore. Clean up to save space. + // It is not part of the original input files. + try { + tmpDir = item.hfilePath.getParent(); + if (tmpDir.getName().equals(TMP_DIR)) { + fs.delete(item.hfilePath, false); + } + } catch (IOException e) { + LOG.warn("Unable to delete temporary split file " + item.hfilePath); + } LOG.info("Successfully split into new HFiles " + botOut + " and " + topOut); return lqis; }