From 3733b73dbad79a1efc567e43680aab7b4bd47497 Mon Sep 17 00:00:00 2001 From: chenheng Date: Wed, 24 Aug 2016 16:19:16 +0800 Subject: [PATCH] HBASE-16490 Fix race condition between SnapshotManager and SnapshotCleaner --- .../java/org/apache/hadoop/hbase/master/HMaster.java | 5 ++++- .../hbase/master/cleaner/BaseFileCleanerDelegate.java | 7 +++++++ .../hadoop/hbase/master/cleaner/CleanerChore.java | 16 ++++++++++++++-- .../hbase/master/cleaner/FileCleanerDelegate.java | 8 ++++++++ .../hadoop/hbase/master/cleaner/HFileCleaner.java | 12 ++++++++++-- .../master/snapshot/EnabledTableSnapshotHandler.java | 2 +- .../hbase/master/snapshot/SnapshotHFileCleaner.java | 13 +++++++++++++ .../hadoop/hbase/master/snapshot/SnapshotManager.java | 17 +++++++++++++++++ .../hbase/master/snapshot/TakeSnapshotHandler.java | 7 ++++++- 9 files changed, 80 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 97ad394..d5de797 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -35,6 +35,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -973,8 +974,10 @@ public class HMaster extends HRegionServer implements MasterServices { //start the hfile archive cleaner thread Path archiveDir = HFileArchiveUtil.getArchivePath(conf); + Map params = new HashMap(); + params.put(MASTER, this); this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem() - .getFileSystem(), archiveDir); + .getFileSystem(), archiveDir, params); getChoreService().scheduleChore(hfileCleaner); serviceStarted = true; if (LOG.isTraceEnabled()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java index c6955d0..891db22 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hbase.BaseConfigurable; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; +import java.util.Map; + /** * Base class for file cleaners which allows subclasses to implement a simple * isFileDeletable method (which used to be the FileCleanerDelegate contract). @@ -39,6 +41,11 @@ implements FileCleanerDelegate { }}); } + @Override + public void init(Map params) { + // subclass could override it if needed. + } + /** * Should the master delete the file or keep it? * @param fStat file status of the file to check diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java index 5a93a6d..d35f403 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master.cleaner; import java.io.IOException; import java.util.LinkedList; import java.util.List; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -29,6 +30,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.ipc.RemoteException; @@ -49,6 +52,12 @@ public abstract class CleanerChore extends Schedu private final Path oldFileDir; private final Configuration conf; protected List cleanersChain; + protected Map params; + + public CleanerChore(String name, final int sleepPeriod, final Stoppable s, Configuration conf, + FileSystem fs, Path oldFileDir, String confKey) { + this(name, sleepPeriod, s, conf, fs, oldFileDir, confKey, null); + } /** * @param name name of the chore being run @@ -58,17 +67,19 @@ public abstract class CleanerChore extends Schedu * @param fs handle to the FS * @param oldFileDir the path to the archived files * @param confKey configuration key for the classes to instantiate + * @param params members could be used in cleaner */ public CleanerChore(String name, final int sleepPeriod, final Stoppable s, Configuration conf, - FileSystem fs, Path oldFileDir, String confKey) { + FileSystem fs, Path oldFileDir, String confKey, Map params) { super(name, s, sleepPeriod); this.fs = fs; this.oldFileDir = oldFileDir; this.conf = conf; - + this.params = params; initCleanerChain(confKey); } + /** * Validate the file to see if it even belongs in the directory. If it is valid, then the file * will go through the cleaner delegates, but otherwise the file is just deleted. @@ -109,6 +120,7 @@ public abstract class CleanerChore extends Schedu @SuppressWarnings("unchecked") T cleaner = (T) c.newInstance(); cleaner.setConf(conf); + cleaner.init(this.params); return cleaner; } catch (Exception e) { LOG.warn("Can NOT create CleanerDelegate: " + className, e); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java index b11fd80..7a15b96 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java @@ -22,6 +22,8 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.hbase.Stoppable; +import java.util.Map; + /** * General interface for cleaning files from a folder (generally an archive or * backup folder). These are chained via the {@link CleanerChore} to determine @@ -36,4 +38,10 @@ public interface FileCleanerDelegate extends Configurable, Stoppable { * @return files that are ok to delete according to this cleaner */ Iterable getDeletableFiles(Iterable files); + + + /** + * this method is used to pass some instance into subclass + * */ + void init(Map params); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java index 2785155..89c316b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.master.cleaner; import java.util.List; +import java.util.Map; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -35,16 +36,23 @@ public class HFileCleaner extends CleanerChore { public static final String MASTER_HFILE_CLEANER_PLUGINS = "hbase.master.hfilecleaner.plugins"; + public HFileCleaner(final int period, final Stoppable stopper, Configuration conf, FileSystem fs, + Path directory) { + this(period, stopper, conf, fs, directory, null); + } + /** * @param period the period of time to sleep between each run * @param stopper the stopper * @param conf configuration to use * @param fs handle to the FS * @param directory directory to be cleaned + * @param params params could be used in subclass of BaseHFileCleanerDelegate */ public HFileCleaner(final int period, final Stoppable stopper, Configuration conf, FileSystem fs, - Path directory) { - super("HFileCleaner", period, stopper, conf, fs, directory, MASTER_HFILE_CLEANER_PLUGINS); + Path directory, Map params) { + super("HFileCleaner", period, stopper, conf, fs, + directory, MASTER_HFILE_CLEANER_PLUGINS, params); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java index 7e047ac..f545a82 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java @@ -50,7 +50,7 @@ public class EnabledTableSnapshotHandler extends TakeSnapshotHandler { public EnabledTableSnapshotHandler(SnapshotDescription snapshot, MasterServices master, final SnapshotManager manager) { - super(snapshot, master); + super(snapshot, master, manager); this.coordinator = manager.getCoordinator(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java index df03d63..3e207dd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master.snapshot; import java.io.IOException; import java.util.Collection; import java.util.Collections; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,6 +31,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; @@ -57,19 +59,29 @@ public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate { /** File cache for HFiles in the completed and currently running snapshots */ private SnapshotFileCache cache; + private HMaster master; + @Override public synchronized Iterable getDeletableFiles(Iterable files) { try { + master.getSnapshotManager().acquireLock(); return cache.getUnreferencedFiles(files); } catch (CorruptedSnapshotException cse) { LOG.debug("Corrupted in-progress snapshot file exception, ignored ", cse); } catch (IOException e) { LOG.error("Exception while checking if files were valid, keeping them just in case.", e); + } finally { + master.getSnapshotManager().releaseLock(); } return Collections.emptyList(); } @Override + public void init(Map params) { + this.master = (HMaster)params.get(HMaster.MASTER); + } + + @Override protected boolean isFileDeletable(FileStatus fStat) { return false; } @@ -93,6 +105,7 @@ public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate { } } + @Override public void stop(String why) { this.cache.stop(why); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java index 0304e38..94b49d7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -160,6 +161,14 @@ public class SnapshotManager extends MasterProcedureManager implements Stoppable private Path rootDir; private ExecutorService executorService; + /** + * Lock for snapshot operations (only exclusive mode now) + * - create snapshot + * - SnapshotCleaner + * */ + private ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + public SnapshotManager() {} /** @@ -1172,4 +1181,12 @@ public class SnapshotManager extends MasterProcedureManager implements Stoppable builder.setType(SnapshotDescription.Type.FLUSH); return builder.build(); } + + public void acquireLock() { + this.lock.writeLock().lock(); + } + + public void releaseLock() { + this.lock.writeLock().unlock(); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java index 8967a70..b4e892f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java @@ -87,6 +87,7 @@ public abstract class TakeSnapshotHandler extends EventHandler implements Snapsh protected final MonitoredTask status; protected final TableName snapshotTable; protected final SnapshotManifest snapshotManifest; + protected final SnapshotManager snapshotManager; protected HTableDescriptor htd; @@ -94,13 +95,15 @@ public abstract class TakeSnapshotHandler extends EventHandler implements Snapsh * @param snapshot descriptor of the snapshot to take * @param masterServices master services provider */ - public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices) { + public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices, + final SnapshotManager snapshotManager) { super(masterServices, EventType.C_M_SNAPSHOT_TABLE); assert snapshot != null : "SnapshotDescription must not be nul1"; assert masterServices != null : "MasterServices must not be nul1"; this.master = masterServices; this.snapshot = snapshot; + this.snapshotManager = snapshotManager; this.snapshotTable = TableName.valueOf(snapshot.getTable()); this.conf = this.master.getConfiguration(); this.fs = this.master.getMasterFileSystem().getFileSystem(); @@ -160,6 +163,7 @@ public abstract class TakeSnapshotHandler extends EventHandler implements Snapsh String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " " + eventType + " on table " + snapshotTable; LOG.info(msg); + snapshotManager.acquireLock(); status.setStatus(msg); try { // If regions move after this meta scan, the region specific snapshot should fail, triggering @@ -228,6 +232,7 @@ public abstract class TakeSnapshotHandler extends EventHandler implements Snapsh } catch (IOException e) { LOG.error("Couldn't delete snapshot working directory:" + workingDir); } + snapshotManager.releaseLock(); releaseTableLock(); } } -- 1.9.3 (Apple Git-50)