From c5531197c8605a81d7a82964f024ead392f55449 Mon Sep 17 00:00:00 2001 From: Zach York Date: Wed, 2 May 2018 11:16:02 -0700 Subject: [PATCH] HBASE-21070 Fix SnapshotFileCache for HBase backed by S3 SnapshotFileCache depends on getting the last modified time of the snapshot directory, however, S3 FileSystem's do not update the last modified time of the top 'folder' when objects are added/removed. --- .../hbase/master/snapshot/SnapshotFileCache.java | 80 ++++++++++++---------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java index 358b4ea..96e53d0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java @@ -91,12 +91,14 @@ public class SnapshotFileCache implements Stoppable { private final FileSystem fs; private final SnapshotFileInspector fileInspector; private final Path snapshotDir; - private final Set cache = new HashSet<>(); + private volatile Set cache = new HashSet(); + private final ReentrantLock readLock = new ReentrantLock(); /** * This is a helper map of information about the snapshot directories so we don't need to rescan * them if they haven't changed since the last time we looked. */ - private final Map snapshots = new HashMap<>(); + private volatile Map snapshots = + new HashMap(); private final Timer refreshTimer; private long lastModifiedTime = Long.MIN_VALUE; @@ -206,49 +208,55 @@ public class SnapshotFileCache implements Stoppable { private synchronized void refreshCache() throws IOException { // get the status of the snapshots directory and check if it is has changes - FileStatus dirStatus; - try { - dirStatus = fs.getFileStatus(snapshotDir); - } catch (FileNotFoundException e) { - if (this.cache.size() > 0) { - LOG.error("Snapshot directory: " + snapshotDir + " doesn't exist"); + // We need to check the internal folder as Object Stores do not always update + // the parent directory's modification time. + FileStatus[] snapshotsOnDisk = FSUtils.listStatus(fs, snapshotDir); + if (snapshotsOnDisk == null) { + // remove all the remembered snapshots because we don't have any left + if (LOG.isDebugEnabled() && this.snapshots.size() > 0) { + LOG.debug("No snapshots on-disk under: {}, cache empty", snapshotDir); } + this.snapshots.clear(); + this.cache.clear(); return; } + int containsTmpDir = 0; + long subDirLastModifiedTime = Long.MIN_VALUE; + for (FileStatus fileStatus : snapshotsOnDisk) { + if (fileStatus.getPath().getName().equals(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME)) { + containsTmpDir = 1; + } else if (fileStatus.getModificationTime() > subDirLastModifiedTime) { + subDirLastModifiedTime = fileStatus.getModificationTime(); + } + } + // if the snapshot directory wasn't modified since we last check, we are done - if (dirStatus.getModificationTime() <= this.lastModifiedTime) return; + if (subDirLastModifiedTime <= this.lastModifiedTime && + snapshotsOnDisk.length - containsTmpDir == this.snapshots.size()) { + LOG.debug("Snapshot directory has not been modified since the last refresh, " + + "skipping cache refresh"); + return; + } // directory was modified, so we need to reload our cache // there could be a slight race here where we miss the cache, check the directory modification // time, then someone updates the directory, causing us to not scan the directory again. // However, snapshot directories are only created once, so this isn't an issue. + LOG.debug("Snapshot Directory: {} has changed since last run, refreshing SnapshotFileCache.", + snapshotDir); - // 1. update the modified time - this.lastModifiedTime = dirStatus.getModificationTime(); + // 1. Create a temp cache + final Set tempCache = new HashSet(); + Map known = new HashMap(); - // 2.clear the cache - this.cache.clear(); - Map known = new HashMap<>(); - - // 3. check each of the snapshot directories - FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir); - if (snapshots == null) { - // remove all the remembered snapshots because we don't have any left - if (LOG.isDebugEnabled() && this.snapshots.size() > 0) { - LOG.debug("No snapshots on-disk, cache empty"); - } - this.snapshots.clear(); - return; - } - - // 3.1 iterate through the on-disk snapshots - for (FileStatus snapshot : snapshots) { + // 2. iterate through the on-disk snapshots + for (FileStatus snapshot : snapshotsOnDisk) { String name = snapshot.getPath().getName(); - // its not the tmp dir, + // its not the tmp dir if (!name.equals(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME)) { SnapshotDirectoryInfo files = this.snapshots.remove(name); - // 3.1.1 if we don't know about the snapshot or its been modified, we need to update the + // 2.1 if we don't know about the snapshot or its been modified, we need to update the // files the latter could occur where I create a snapshot, then delete it, and then make a // new snapshot with the same name. We will need to update the cache the information from // that new snapshot, even though it has the same name as the files referenced have @@ -258,15 +266,17 @@ public class SnapshotFileCache implements Stoppable { Collection storedFiles = fileInspector.filesUnderSnapshot(snapshot.getPath()); files = new SnapshotDirectoryInfo(snapshot.getModificationTime(), storedFiles); } - // 3.2 add all the files to cache - this.cache.addAll(files.getFiles()); + // 2.2 add all the files to cache + tempCache.addAll(files.getFiles()); known.put(name, files); } } - // 4. set the snapshots we are tracking - this.snapshots.clear(); - this.snapshots.putAll(known); + this.lastModifiedTime = subDirLastModifiedTime; + + // 3. Update the cache and snapshot objects + this.cache = tempCache; + this.snapshots = known; } @VisibleForTesting List getSnapshotsInProgress( -- 2.6.4