From 754c975685ba7335571ac82ee5bff4557eeb526a Mon Sep 17 00:00:00 2001 From: chenheng Date: Mon, 22 Aug 2016 16:45:20 +0800 Subject: [PATCH] HBASE-16464 archive folder grows bigger and bigger due to corrupt snapshot under tmp dir --- .../hbase/master/snapshot/SnapshotFileCache.java | 18 ++++++++++++++---- .../hbase/snapshot/SnapshotDescriptionUtils.java | 1 + .../master/snapshot/TestSnapshotHFileCleaner.java | 21 +++++++++++++++++++++ .../hadoop/hbase/snapshot/SnapshotTestingUtils.java | 13 +++++++++++++ 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java index dfd3cb5..a7ceda3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java @@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.util.FSUtils; @@ -297,10 +298,19 @@ public class SnapshotFileCache implements Stoppable { // only add those files to the cache, but not to the known snapshots Path snapshotTmpDir = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME); // only add those files to the cache, but not to the known snapshots - FileStatus[] running = FSUtils.listStatus(fs, snapshotTmpDir); - if (running != null) { - for (FileStatus run : running) { - snapshotInProgress.addAll(fileInspector.filesUnderSnapshot(run.getPath())); + try { + FileStatus[] running = FSUtils.listStatus(fs, snapshotTmpDir); + if (running != null) { + for (FileStatus run : running) { + snapshotInProgress.addAll(fileInspector.filesUnderSnapshot(run.getPath())); + } + } + } catch (CorruptedSnapshotException e) { + if (e.getCause() instanceof FileNotFoundException) { + fs.delete(snapshotTmpDir, true); + LOG.warn("delete the " + snapshotTmpDir + " due to exception:", e.getCause()); + } else { + throw e; } } return snapshotInProgress; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java index 2211aab..58c75be 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.snapshot; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.Collections; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java index 616907c..71f4f77 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java @@ -166,4 +166,25 @@ public class TestSnapshotHFileCleaner { fs.delete(SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir), true); } } + + + /** + * HBASE-16464 + */ + @Test + public void testMissedTmpSnapshot() throws IOException { + SnapshotTestingUtils.SnapshotMock + snapshotMock = new SnapshotTestingUtils.SnapshotMock(TEST_UTIL.getConfiguration(), fs, rootDir); + SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2( + SNAPSHOT_NAME_STR, TABLE_NAME_STR); + builder.addRegionV2(); + builder.missOneRegionSnapshotFile(); + + long period = Long.MAX_VALUE; + SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000, + "test-snapshot-file-cache-refresh", new SnapshotFiles()); + cache.getSnapshotsInProgress(); + Path tmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir); + assertFalse(fs.exists(tmpDir)); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java index b5f6054..de0d1e2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java @@ -575,6 +575,19 @@ public final class SnapshotTestingUtils { corruptFile(p); } + public void missOneRegionSnapshotFile() throws IOException { + FileStatus[] manifestFiles = FSUtils.listStatus(fs, snapshotDir); + for (FileStatus fileStatus : manifestFiles) { + //LOG.info(fileStatus); + String fileName = fileStatus.getPath().getName(); + if (fileName.endsWith(SnapshotDescriptionUtils.SNAPSHOTINFO_FILE) + || fileName.endsWith(".tabledesc") + || fileName.endsWith(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME)) { + fs.delete(fileStatus.getPath(), true); + } + } + } + /** * Corrupt data-manifest file * -- 1.9.3 (Apple Git-50)