diff --git a/src/main/java/org/apache/hadoop/hbase/BaseConfigurable.java b/src/main/java/org/apache/hadoop/hbase/BaseConfigurable.java new file mode 100644 index 0000000..36efb50 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/BaseConfigurable.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; + +/** + * HBase version of Hadoop's Configured class that doesn't initialize the + * configuration via {@link #setConf(Configuration)} in the constructor, but + * only sets the configuration through the {@link #setConf(Configuration)} + * method + */ +public class BaseConfigurable implements Configurable { + + private Configuration conf; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return this.conf; + } +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/Chore.java b/src/main/java/org/apache/hadoop/hbase/Chore.java index 38f476b..547eeec 100644 --- a/src/main/java/org/apache/hadoop/hbase/Chore.java +++ b/src/main/java/org/apache/hadoop/hbase/Chore.java @@ -78,6 +78,7 @@ public abstract class Chore extends HasThread { LOG.fatal(getName() + "error", t); } finally { LOG.info(getName() + " exiting"); + cleanup(); } } @@ -110,4 +111,11 @@ public abstract class Chore extends HasThread { protected void sleep() { this.sleeper.sleep(); } + + /** + * Called when the chore has completed, allowing subclasses to cleanup any + * extra overhead + */ + protected void cleanup() { + } } diff --git a/src/main/java/org/apache/hadoop/hbase/HConstants.java b/src/main/java/org/apache/hadoop/hbase/HConstants.java index 25f5e15..2853f86 100644 --- a/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -660,8 +660,10 @@ public final class HConstants { public static final int HIGH_QOS = 100; public static final int REPLICATION_QOS = 5; // normal_QOS < replication_QOS < high_QOS + /** Configuration key for the directory to backup HFiles for a table */ + public static final String HFILE_ARCHIVE_DIRECTORY = "hbase.table.archive.directory"; private HConstants() { // Can't be instantiated with this ctor. } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java b/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java new file mode 100644 index 0000000..ed787e4 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java @@ -0,0 +1,625 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.backup; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; +import org.apache.hadoop.io.MultipleIOException; + +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.Collections2; +import com.google.common.collect.Lists; + +/** + * Utility class to handle the removal of HFiles (or the respective {@link StoreFile StoreFiles}) + * for a HRegion from the {@link FileSystem}. The hfiles will be archived or deleted, depending on + * the state of the system. + */ +public class HFileArchiver { + private static final Log LOG = LogFactory.getLog(HFileArchiver.class); + private static final String SEPARATOR = "."; + + private HFileArchiver() { + // hidden ctor since this is just a util + } + + /** + * Cleans up all the files for a HRegion by archiving the HFiles to the + * archive directory + * @param fs the file system object + * @param info HRegionInfo for region to be deleted + * @throws IOException + */ + public static void archiveRegion(FileSystem fs, HRegionInfo info) + throws IOException { + Path rootDir = FSUtils.getRootDir(fs.getConf()); + archiveRegion(fs, rootDir, HTableDescriptor.getTableDir(rootDir, info.getTableName()), + HRegion.getRegionDir(rootDir, info)); + } + + + /** + * Remove an entire region from the table directory via archiving the region's hfiles. + * @param fs {@link FileSystem} from which to remove the region + * @param rootdir {@link Path} to the root directory where hbase files are stored (for building + * the archive path) + * @param tableDir {@link Path} to where the table is being stored (for building the archive path) + * @param regionDir {@link Path} to where a region is being stored (for building the archive path) + * @return true if the region was sucessfully deleted. false if the filesystem + * operations could not complete. + * @throws IOException if the request cannot be completed + */ + public static boolean archiveRegion(FileSystem fs, Path rootdir, Path tableDir, Path regionDir) + throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("ARCHIVING region " + regionDir.toString()); + } + + // otherwise, we archive the files + // make sure we can archive + if (tableDir == null || regionDir == null) { + LOG.error("No archive directory could be found because tabledir (" + tableDir + + ") or regiondir (" + regionDir + "was null. Deleting files instead."); + deleteRegionWithoutArchiving(fs, regionDir); + // we should have archived, but failed to. Doesn't matter if we deleted + // the archived files correctly or not. + return false; + } + + // make sure the regiondir lives under the tabledir + Preconditions.checkArgument(regionDir.toString().startsWith(tableDir.toString())); + Path regionArchiveDir = HFileArchiveUtil.getRegionArchiveDir(fs.getConf(), tableDir, regionDir); + + LOG.debug("Have an archive directory, preparing to move files"); + FileStatusConverter getAsFile = new FileStatusConverter(fs); + // otherwise, we attempt to archive the store files + + // build collection of just the store directories to archive + Collection toArchive = new ArrayList(); + final PathFilter dirFilter = new FSUtils.DirFilter(fs); + PathFilter nonHidden = new PathFilter() { + @Override + public boolean accept(Path file) { + return dirFilter.accept(file) && !file.getName().toString().startsWith("."); + } + }; + FileStatus[] storeDirs = FSUtils.listStatus(fs, regionDir, nonHidden); + // if there no files, we can just delete the directory and return; + if (storeDirs == null) { + LOG.debug("Region directory (" + regionDir + ") was empty, just deleting and returning!"); + return deleteRegionWithoutArchiving(fs, regionDir); + } + + // convert the files in the region to a File + toArchive.addAll(Lists.transform(Arrays.asList(storeDirs), getAsFile)); + LOG.debug("Archiving:" + toArchive); + boolean success = false; + try { + success = resolveAndArchive(fs, regionArchiveDir, toArchive); + } catch (IOException e) { + success = false; + } + + // if that was successful, then we delete the region + if (success) { + LOG.debug("Successfully resolved and archived, now can just delete region."); + return deleteRegionWithoutArchiving(fs, regionDir); + } + + throw new IOException("Received error when attempting to archive files (" + toArchive + + "), cannot delete region directory."); + } + + /** + * Remove the store files, either by archiving them or outright deletion + * @param fs the filesystem where the store files live + * @param parent Parent region hosting the store files + * @param conf {@link Configuration} to examine to determine the archive directory + * @param family the family hosting the store files + * @param compactedFiles files to be disposed of. No further reading of these files should be + * attempted; otherwise likely to cause an {@link IOException} + * @throws IOException if the files could not be correctly disposed. + */ + public static void archiveStoreFiles(FileSystem fs, HRegion parent, + Configuration conf, byte[] family, Collection compactedFiles) throws IOException { + + // sometimes in testing, we don't have rss, so we need to check for that + if (fs == null) { + LOG.warn("Passed filesystem is null, so just deleting the files without archiving for region:" + + Bytes.toString(parent.getRegionName()) + ", family:" + Bytes.toString(family)); + deleteStoreFilesWithoutArchiving(compactedFiles); + return; + } + + // short circuit if we don't have any files to delete + if (compactedFiles.size() == 0) { + LOG.debug("No store files to dispose, done!"); + return; + } + + // build the archive path + if (parent == null || family == null) throw new IOException( + "Need to have a parent region and a family to archive from."); + + Path storeArchiveDir = HFileArchiveUtil.getStoreArchivePath(conf, parent, family); + + // make sure we don't archive if we can't and that the archive dir exists + if (!fs.mkdirs(storeArchiveDir)) { + throw new IOException("Could not make archive directory (" + storeArchiveDir + ") for store:" + + Bytes.toString(family) + ", deleting compacted files instead."); + } + + // otherwise we attempt to archive the store files + LOG.debug("Archiving compacted store files."); + + // wrap the storefile into a File + StoreToFile getStorePath = new StoreToFile(fs); + Collection storeFiles = Collections2.transform(compactedFiles, getStorePath); + + // do the actual archive + if (!resolveAndArchive(fs, storeArchiveDir, storeFiles)) { + throw new IOException("Failed to archive/delete all the files for region:" + + Bytes.toString(parent.getRegionName()) + ", family:" + Bytes.toString(family) + + " into " + storeArchiveDir + "Something is probably arwy on the filesystem."); + } + } + + /** + * Archive the given files and resolve any conflicts with existing files via appending the time + * archiving started (so all conflicts in the same group have the same timestamp appended). + *

+ * If any of the passed files to archive are directories, archives the all files under that + * directory. Archive directory structure for children is the base archive directory name + the + * parent directory and is built recursively is passed files are directories themselves. + * @param fs {@link FileSystem} on which to archive the files + * @param baseArchiveDir base archive directory to archive the given files + * @param toArchive files to be archived + * @return true on success, false otherwise + * @throws IOException on unexpected failure + */ + private static boolean resolveAndArchive(FileSystem fs, Path baseArchiveDir, + Collection toArchive) throws IOException { + LOG.debug("Starting to archive files:" + toArchive); + long start = EnvironmentEdgeManager.currentTimeMillis(); + List failures = resolveAndArchive(fs, baseArchiveDir, toArchive, start); + + // clean out the failures by just deleting them + if (failures.size() > 0) { + try { + LOG.error("Failed to complete archive, deleting extra store files."); + deleteFilesWithoutArchiving(failures); + } catch (IOException e) { + LOG.warn("Failed to delete store file(s) when archiving failed", e); + } + return false; + } + return true; + } + + /** + * Resolve any conflict with an existing archive file via timestamp-append + * renaming of the existing file and then archive the passed in files. + * @param fs {@link FileSystem} on which to archive the files + * @param baseArchiveDir base archive directory to store the files. If any of + * the files to archive are directories, will append the name of the + * directory to the base archive directory name, creating a parallel + * structure. + * @param toArchive files/directories that need to be archvied + * @param start time the archiving started - used for resolving archive + * conflicts. + * @return the list of failed to archive files. + * @throws IOException if an unexpected file operation exception occured + */ + private static List resolveAndArchive(FileSystem fs, Path baseArchiveDir, + Collection toArchive, long start) throws IOException { + // short circuit if no files to move + if (toArchive.size() == 0) return Collections.emptyList(); + + LOG.debug("moving files to the archive directory: " + baseArchiveDir); + + // make sure the archive directory exists + if (!fs.exists(baseArchiveDir)) { + if (!fs.mkdirs(baseArchiveDir)) { + throw new IOException("Failed to create the archive directory:" + baseArchiveDir + + ", quitting archive attempt."); + } + LOG.debug("Created archive directory:" + baseArchiveDir); + } + + List failures = new ArrayList(); + String startTime = Long.toString(start); + for (File file : toArchive) { + // if its a file archive it + try { + LOG.debug("Archiving:" + file); + if (file.isFile()) { + // attempt to archive the file + if (!resolveAndArchiveFile(baseArchiveDir, file, startTime)) { + LOG.warn("Couldn't archive " + file + " into backup directory: " + baseArchiveDir); + failures.add(file); + } + } else { + // otherwise its a directory and we need to archive all files + LOG.debug(file + " is a directory, archiving children files"); + // so we add the directory name to the one base archive + Path parentArchiveDir = new Path(baseArchiveDir, file.getName()); + // and then get all the files from that directory and attempt to + // archive those too + Collection children = file.getChildren(); + failures.addAll(resolveAndArchive(fs, parentArchiveDir, children, start)); + } + } catch (IOException e) { + LOG.warn("Failed to archive file: " + file, e); + failures.add(file); + } + } + return failures; + } + + /** + * Attempt to archive the passed in file to the archive directory. + *

+ * If the same file already exists in the archive, it is moved to a timestamped directory under + * the archive directory and the new file is put in its place. + * @param archiveDir {@link Path} to the directory that stores the archives of the hfiles + * @param currentFile {@link Path} to the original HFile that will be archived + * @param archiveStartTime time the archiving started, to resolve naming conflicts + * @return true if the file is successfully archived. false if there was a + * problem, but the operation still completed. + * @throws IOException on failure to complete {@link FileSystem} operations. + */ + private static boolean resolveAndArchiveFile(Path archiveDir, File currentFile, + String archiveStartTime) throws IOException { + // build path as it should be in the archive + String filename = currentFile.getName(); + Path archiveFile = new Path(archiveDir, filename); + FileSystem fs = currentFile.getFileSystem(); + + // if the file already exists in the archive, move that one to a timestamped backup. This is a + // really, really unlikely situtation, where we get the same name for the existing file, but + // is included just for that 1 in trillion chance. + if (fs.exists(archiveFile)) { + if (LOG.isDebugEnabled()) { + LOG.debug("File:" + archiveFile + " already exists in archive, moving to " + + "timestamped backup and overwriting current."); + } + + // move the archive file to the stamped backup + Path backedupArchiveFile = new Path(archiveDir, filename + SEPARATOR + archiveStartTime); + if (!fs.rename(archiveFile, backedupArchiveFile)) { + LOG.error("Could not rename archive file to backup: " + backedupArchiveFile + + ", deleting existing file in favor of newer."); + // try to delete the exisiting file, if we can't rename it + if (!fs.delete(archiveFile, false)) { + throw new IOException("Couldn't delete existing archive file (" + archiveFile + + ") or rename it to the backup file (" + backedupArchiveFile + + ")to make room for similarly named file."); + } + } + LOG.debug("Backed up archive file from: " + archiveFile); + } + + LOG.debug("No existing file in archive for:" + archiveFile + ", free to archive original file."); + + // at this point, we should have a free spot for the archive file + if (currentFile.moveAndClose(archiveFile)) { + LOG.error("Failed to archive file:" + currentFile); + return false; + } else if (LOG.isDebugEnabled()) { + LOG.debug("Finished archiving file from: " + currentFile + ", to: " + archiveFile); + } + return true; + } + + /** + * Simple delete of regular files from the {@link FileSystem}. + *

+ * This method is a more generic implementation that the other deleteXXX + * methods in this class, allowing more code reuse at the cost of a couple + * more, short-lived objects (which should have minimum impact on the jvm). + * @param fs {@link FileSystem} where the files live + * @param files {@link Collection} of files to be deleted + * @throws IOException if a file cannot be deleted. All files will be + * attempted to deleted before throwing the exception, rather than + * failing at the first file. + */ + private static void deleteFilesWithoutArchiving(Collection files) throws IOException { + List errors = new ArrayList(0); + for (File file : files) { + try { + LOG.debug("Deleting region file:" + file); + file.delete(); + } catch (IOException e) { + LOG.error("Failed to delete file:" + file); + errors.add(e); + } + } + if (errors.size() > 0) { + throw MultipleIOException.createIOException(errors); + } + } + + /** + * Without regard for backup, delete a region. Should be used with caution. + * @param regionDir {@link Path} to the region to be deleted. + * @param fs FileSystem from which to delete the region + * @return true on successful deletion, false otherwise + * @throws IOException on filesystem operation failure + */ + private static boolean deleteRegionWithoutArchiving(FileSystem fs, Path regionDir) + throws IOException { + if (fs.delete(regionDir, true)) { + LOG.debug("Deleted all region files in: " + regionDir); + return true; + } + LOG.debug("Failed to delete region directory:" + regionDir); + return false; + } + + /** + * Just do a simple delete of the given store files + *

+ * A best effort is made to delete each of the files, rather than bailing on the first failure. + *

+ * This method is preferable to {@link #deleteFilesWithoutArchiving(Collection)} since it consumes + * less resources, but is limited in terms of usefulness + * @param compactedFiles store files to delete from the file system. + * @throws IOException if a file cannot be deleted. All files will be attempted to deleted before + * throwing the exception, rather than failing at the first file. + */ + private static void deleteStoreFilesWithoutArchiving(Collection compactedFiles) + throws IOException { + LOG.debug("Deleting store files without archiving."); + List errors = new ArrayList(0); + for (StoreFile hsf : compactedFiles) { + try { + hsf.deleteReader(); + } catch (IOException e) { + LOG.error("Failed to delete store file:" + hsf.getPath()); + errors.add(e); + } + } + if (errors.size() > 0) { + throw MultipleIOException.createIOException(errors); + } + } + + /** + * Adapt a type to match the {@link File} interface, which is used internally for handling + * archival/removal of files + * @param type to adapt to the {@link File} interface + */ + private static abstract class FileConverter implements Function { + protected final FileSystem fs; + + public FileConverter(FileSystem fs) { + this.fs = fs; + } + } + + /** + * Convert a FileStatus to something we can manage in the archiving + */ + private static class FileStatusConverter extends FileConverter { + public FileStatusConverter(FileSystem fs) { + super(fs); + } + + @Override + public File apply(FileStatus input) { + return new FileablePath(fs, input.getPath()); + } + } + + /** + * Convert the {@link StoreFile} into something we can manage in the archive + * methods + */ + private static class StoreToFile extends FileConverter { + public StoreToFile(FileSystem fs) { + super(fs); + } + + @Override + public File apply(StoreFile input) { + return new FileableStoreFile(fs, input); + } + } + + /** + * Wrapper to handle file operations uniformly + */ + private static abstract class File { + protected final FileSystem fs; + + public File(FileSystem fs) { + this.fs = fs; + } + + /** + * Delete the file + * @throws IOException on failure + */ + abstract void delete() throws IOException; + + /** + * Check to see if this is a file or a directory + * @return true if it is a file, false otherwise + * @throws IOException on {@link FileSystem} connection error + */ + abstract boolean isFile() throws IOException; + + /** + * @return if this is a directory, returns all the children in the + * directory, otherwise returns an empty list + * @throws IOException + */ + abstract Collection getChildren() throws IOException; + + /** + * close any outside readers of the file + * @throws IOException + */ + abstract void close() throws IOException; + + /** + * @return the name of the file (not the full fs path, just the individual + * file name) + */ + abstract String getName(); + + /** + * @return the path to this file + */ + abstract Path getPath(); + + /** + * Move the file to the given destination + * @param dest + * @return true on success + * @throws IOException + */ + public boolean moveAndClose(Path dest) throws IOException { + this.close(); + Path p = this.getPath(); + return !fs.rename(p, dest); + } + + /** + * @return the {@link FileSystem} on which this file resides + */ + public FileSystem getFileSystem() { + return this.fs; + } + + @Override + public String toString() { + return this.getClass() + ", file:" + getPath().toString(); + } + } + + /** + * A {@link File} that wraps a simple {@link Path} on a {@link FileSystem}. + */ + private static class FileablePath extends File { + private final Path file; + private final FileStatusConverter getAsFile; + + public FileablePath(FileSystem fs, Path file) { + super(fs); + this.file = file; + this.getAsFile = new FileStatusConverter(fs); + } + + @Override + public void delete() throws IOException { + if (!fs.delete(file, true)) throw new IOException("Failed to delete:" + this.file); + } + + @Override + public String getName() { + return file.getName(); + } + + @Override + public Collection getChildren() throws IOException { + if (fs.isFile(file)) return Collections.emptyList(); + return Collections2.transform(Arrays.asList(fs.listStatus(file)), getAsFile); + } + + @Override + public boolean isFile() throws IOException { + return fs.isFile(file); + } + + @Override + public void close() throws IOException { + // NOOP - files are implicitly closed on removal + } + + @Override + Path getPath() { + return file; + } + } + + /** + * {@link File} adapter for a {@link StoreFile} living on a {@link FileSystem} + * . + */ + private static class FileableStoreFile extends File { + StoreFile file; + + public FileableStoreFile(FileSystem fs, StoreFile store) { + super(fs); + this.file = store; + } + + @Override + public void delete() throws IOException { + file.deleteReader(); + } + + @Override + public String getName() { + return file.getPath().getName(); + } + + @Override + public boolean isFile() { + return true; + } + + @Override + public Collection getChildren() throws IOException { + // storefiles don't have children + return Collections.emptyList(); + } + + @Override + public void close() throws IOException { + file.closeReader(true); + } + + @Override + Path getPath() { + return file.getPath(); + } + } +} diff --git a/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index c3fcb99..53a4358 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -39,11 +39,10 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.Server; -import org.apache.hadoop.hbase.TableExistsException; +import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.Bytes; @@ -51,7 +50,6 @@ import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Writables; - /** * A janitor for the catalog tables. Scans the .META. catalog * table on a period looking for unused regions to garbage collect. @@ -237,7 +235,7 @@ class CatalogJanitor extends Chore { if (hasNoReferences(a) && hasNoReferences(b)) { LOG.debug("Deleting region " + parent.getRegionNameAsString() + " because daughter splits no longer hold references"); - // wipe out daughter references from parent region + // wipe out daughter references from parent region in meta removeDaughtersFromParent(parent); // This latter regionOffline should not be necessary but is done for now @@ -248,8 +246,7 @@ class CatalogJanitor extends Chore { this.services.getAssignmentManager().regionOffline(parent); } FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); - Path rootdir = this.services.getMasterFileSystem().getRootDir(); - HRegion.deleteRegion(fs, rootdir, parent); + HFileArchiver.archiveRegion(fs, parent); MetaEditor.deleteRegion(this.server.getCatalogTracker(), parent); result = true; } diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 186c896..b476638 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -44,6 +44,7 @@ import javax.management.ObjectName; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HColumnDescriptor; @@ -75,6 +76,8 @@ import org.apache.hadoop.hbase.ipc.HMasterInterface; import org.apache.hadoop.hbase.ipc.HMasterRegionInterface; import org.apache.hadoop.hbase.ipc.ProtocolSignature; import org.apache.hadoop.hbase.ipc.RpcServer; +import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; +import org.apache.hadoop.hbase.master.cleaner.LogCleaner; import org.apache.hadoop.hbase.master.handler.CreateTableHandler; import org.apache.hadoop.hbase.master.handler.DeleteTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; @@ -93,6 +96,7 @@ import org.apache.hadoop.hbase.replication.regionserver.Replication; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.HasThread; import org.apache.hadoop.hbase.util.InfoServer; import org.apache.hadoop.hbase.util.Pair; @@ -203,6 +207,7 @@ Server { private CatalogJanitor catalogJanitorChore; private LogCleaner logCleaner; + private HFileCleaner hfileCleaner; private MasterCoprocessorHost cpHost; private final ServerName serverName; @@ -834,12 +839,19 @@ Server { // Start log cleaner thread String n = Thread.currentThread().getName(); + int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000); this.logCleaner = - new LogCleaner(conf.getInt("hbase.master.cleaner.interval", 60 * 1000), + new LogCleaner(cleanerInterval, this, conf, getMasterFileSystem().getFileSystem(), getMasterFileSystem().getOldLogDir()); Threads.setDaemonThreadRunning(logCleaner.getThread(), n + ".oldLogCleaner"); + //start the hfile archive cleaner thread + Path archiveDir = HFileArchiveUtil.getArchivePath(conf); + this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem() + .getFileSystem(), archiveDir); + Threads.setDaemonThreadRunning(hfileCleaner.getThread(), n + ".archivedHFileCleaner"); + // Put up info server. int port = this.conf.getInt("hbase.master.info.port", 60010); if (port >= 0) { @@ -866,6 +878,8 @@ Server { if (this.rpcServer != null) this.rpcServer.stop(); // Clean up and close up shop if (this.logCleaner!= null) this.logCleaner.interrupt(); + if (this.hfileCleaner != null) this.hfileCleaner.interrupt(); + if (this.infoServer != null) { LOG.info("Stopping infoServer"); try { @@ -1815,4 +1829,12 @@ Server { MBeanUtil.registerMBean("Master", "Master", mxBeanInfo); LOG.info("Registered HMaster MXBean"); } -} + + /** + * Exposed for Testing! + * @return the current hfile cleaner + */ + public HFileCleaner getHFileCleaner() { + return this.hfileCleaner; + } +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/master/LogCleaner.java b/src/main/java/org/apache/hadoop/hbase/master/LogCleaner.java deleted file mode 100644 index b5fc665..0000000 --- a/src/main/java/org/apache/hadoop/hbase/master/LogCleaner.java +++ /dev/null @@ -1,165 +0,0 @@ -/** - * Copyright 2010 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master; - -import java.io.IOException; -import java.util.LinkedList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.Chore; -import org.apache.hadoop.hbase.RemoteExceptionHandler; -import org.apache.hadoop.hbase.Stoppable; -import org.apache.hadoop.hbase.regionserver.wal.HLog; -import org.apache.hadoop.hbase.util.FSUtils; - -import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS; - -/** - * This Chore, everytime it runs, will clear the HLogs in the old logs folder - * that are deletable for each log cleaner in the chain. - */ -public class LogCleaner extends Chore { - static final Log LOG = LogFactory.getLog(LogCleaner.class.getName()); - - private final FileSystem fs; - private final Path oldLogDir; - private List logCleanersChain; - private final Configuration conf; - - /** - * - * @param p the period of time to sleep between each run - * @param s the stopper - * @param conf configuration to use - * @param fs handle to the FS - * @param oldLogDir the path to the archived logs - */ - public LogCleaner(final int p, final Stoppable s, - Configuration conf, FileSystem fs, - Path oldLogDir) { - super("LogsCleaner", p, s); - this.fs = fs; - this.oldLogDir = oldLogDir; - this.conf = conf; - this.logCleanersChain = new LinkedList(); - - initLogCleanersChain(); - } - - /* - * Initialize the chain of log cleaners from the configuration. The default - * in this chain are: TimeToLiveLogCleaner and ReplicationLogCleaner. - */ - private void initLogCleanersChain() { - String[] logCleaners = conf.getStrings(HBASE_MASTER_LOGCLEANER_PLUGINS); - if (logCleaners != null) { - for (String className : logCleaners) { - LogCleanerDelegate logCleaner = newLogCleaner(className, conf); - addLogCleaner(logCleaner); - } - } - } - - /** - * A utility method to create new instances of LogCleanerDelegate based - * on the class name of the LogCleanerDelegate. - * @param className fully qualified class name of the LogCleanerDelegate - * @param conf - * @return the new instance - */ - public static LogCleanerDelegate newLogCleaner(String className, Configuration conf) { - try { - Class c = Class.forName(className); - LogCleanerDelegate cleaner = (LogCleanerDelegate) c.newInstance(); - cleaner.setConf(conf); - return cleaner; - } catch(Exception e) { - LOG.warn("Can NOT create LogCleanerDelegate: " + className, e); - // skipping if can't instantiate - return null; - } - } - - /** - * Add a LogCleanerDelegate to the log cleaner chain. A log file is deletable - * if it is deletable for each LogCleanerDelegate in the chain. - * @param logCleaner - */ - public void addLogCleaner(LogCleanerDelegate logCleaner) { - if (logCleaner != null && !logCleanersChain.contains(logCleaner)) { - logCleanersChain.add(logCleaner); - LOG.debug("Add log cleaner in chain: " + logCleaner.getClass().getName()); - } - } - - @Override - protected void chore() { - try { - FileStatus [] files = FSUtils.listStatus(this.fs, this.oldLogDir, null); - if (files == null) return; - FILE: for (FileStatus file : files) { - Path filePath = file.getPath(); - if (HLog.validateHLogFilename(filePath.getName())) { - for (LogCleanerDelegate logCleaner : logCleanersChain) { - if (logCleaner.isStopped()) { - LOG.warn("A log cleaner is stopped, won't delete any log."); - return; - } - - if (!logCleaner.isLogDeletable(filePath) ) { - // this log is not deletable, continue to process next log file - continue FILE; - } - } - // delete this log file if it passes all the log cleaners - this.fs.delete(filePath, true); - } else { - LOG.warn("Found a wrongly formated file: " - + file.getPath().getName()); - this.fs.delete(filePath, true); - } - } - } catch (IOException e) { - e = RemoteExceptionHandler.checkIOException(e); - LOG.warn("Error while cleaning the logs", e); - } - } - - @Override - public void run() { - try { - super.run(); - } finally { - for (LogCleanerDelegate lc: this.logCleanersChain) { - try { - lc.stop("Exiting"); - } catch (Throwable t) { - LOG.warn("Stopping", t); - } - } - } - } -} diff --git a/src/main/java/org/apache/hadoop/hbase/master/LogCleanerDelegate.java b/src/main/java/org/apache/hadoop/hbase/master/LogCleanerDelegate.java deleted file mode 100644 index 3c8eb0a..0000000 --- a/src/main/java/org/apache/hadoop/hbase/master/LogCleanerDelegate.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2010 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.Stoppable; - -/** - * Interface for the log cleaning function inside the master. By default, two - * cleaners: TimeToLiveLogCleaner and - * ReplicationLogCleaner are called in order. So if other - * effects are needed, implement your own LogCleanerDelegate and add it to the - * configuration "hbase.master.logcleaner.plugins", which is a comma-separated - * list of fully qualified class names. LogsCleaner will add it to the chain. - * - *

HBase ships with LogsCleaner as the default implementation. - * - *

This interface extends Configurable, so setConf needs to be called once - * before using the cleaner. - * Since LogCleanerDelegates are created in LogsCleaner by reflection. Classes - * that implements this interface should provide a default constructor. - */ -public interface LogCleanerDelegate extends Configurable, Stoppable { - /** - * Should the master delete the log or keep it? - * @param filePath full path to log. - * @return true if the log is deletable, false if not - */ - public boolean isLogDeletable(Path filePath); -} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index 7de0136..3552c22 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.InvalidFamilyOperationException; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.wal.HLog; @@ -440,7 +441,7 @@ public class MasterFileSystem { public void deleteRegion(HRegionInfo region) throws IOException { - fs.delete(HRegion.getRegionDir(rootdir, region), true); + HFileArchiver.archiveRegion(fs, region); } public void deleteTable(byte[] tableName) throws IOException { diff --git a/src/main/java/org/apache/hadoop/hbase/master/TimeToLiveLogCleaner.java b/src/main/java/org/apache/hadoop/hbase/master/TimeToLiveLogCleaner.java deleted file mode 100644 index dde8207..0000000 --- a/src/main/java/org/apache/hadoop/hbase/master/TimeToLiveLogCleaner.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright 2010 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master; - -import java.io.IOException; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.conf.Configuration; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -/** - * Log cleaner that uses the timestamp of the hlog to determine if it should - * be deleted. By default they are allowed to live for 10 minutes. - */ -public class TimeToLiveLogCleaner implements LogCleanerDelegate { - static final Log LOG = LogFactory.getLog(TimeToLiveLogCleaner.class.getName()); - private Configuration conf; - // Configured time a log can be kept after it was closed - private long ttl; - private boolean stopped = false; - - @Override - public boolean isLogDeletable(Path filePath) { - long time = 0; - long currentTime = System.currentTimeMillis(); - try { - FileStatus fStat = filePath.getFileSystem(conf).getFileStatus(filePath); - time = fStat.getModificationTime(); - } catch (IOException e) { - LOG.error("Unable to get modification time of file " + filePath.getName() + - ", not deleting it.", e); - return false; - } - long life = currentTime - time; - if (life < 0) { - LOG.warn("Found a log newer than current time, " + - "probably a clock skew"); - return false; - } - return life > ttl; - } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - this.ttl = conf.getLong("hbase.master.logcleaner.ttl", 600000); - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void stop(String why) { - this.stopped = true; - } - - @Override - public boolean isStopped() { - return this.stopped; - } -} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseHFileCleanerDelegate.java b/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseHFileCleanerDelegate.java new file mode 100644 index 0000000..ed890ac --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseHFileCleanerDelegate.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.BaseConfigurable; + +/** + * Base class for the hfile cleaning function inside the master. By default, only the + * {@link TimeToLiveHFileCleaner} is called. + *

+ * If other effects are needed, implement your own LogCleanerDelegate and add it to the + * configuration "hbase.master.hfilecleaner.plugins", which is a comma-separated list of fully + * qualified class names. The HFileCleaner will build the cleaner chain in + * order the order specified by the configuration. + *

+ * For subclasses, setConf will be called exactly once before using the cleaner. + *

+ * Since {@link BaseHFileCleanerDelegate HFileCleanerDelegates} are created in + * HFileCleaner by reflection, classes that implements this interface must + * provide a default constructor. + */ +@InterfaceAudience.Private +public abstract class BaseHFileCleanerDelegate extends BaseConfigurable implements + FileCleanerDelegate { + + private boolean stopped = false; + + @Override + public void stop(String why) { + this.stopped = true; + } + + @Override + public boolean isStopped() { + return this.stopped; + } +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseLogCleanerDelegate.java b/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseLogCleanerDelegate.java new file mode 100644 index 0000000..c02a6bd --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseLogCleanerDelegate.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.BaseConfigurable; + +/** + * Base class for the log cleaning function inside the master. By default, two + * cleaners: TimeToLiveLogCleaner and + * ReplicationLogCleaner are called in order. So if other effects + * are needed, implement your own LogCleanerDelegate and add it to the + * configuration "hbase.master.logcleaner.plugins", which is a comma-separated + * list of fully qualified class names. LogsCleaner will add it to the chain. + *

+ * HBase ships with LogsCleaner as the default implementation. + *

+ * This interface extends Configurable, so setConf needs to be called once + * before using the cleaner. Since LogCleanerDelegates are created in + * LogsCleaner by reflection. Classes that implements this interface should + * provide a default constructor. + */ +@InterfaceAudience.Private +public abstract class BaseLogCleanerDelegate extends BaseConfigurable implements FileCleanerDelegate { + + @Override + public boolean isFileDeleteable(Path file) { + return isLogDeletable(file); + } + + /** + * Should the master delete the log or keep it? + *

+ * Implementing classes should override {@link #isFileDeleteable(Path)} instead. + * @param filePath full path to log. + * @return true if the log is deletable, false if not + */ + @Deprecated + public abstract boolean isLogDeletable(Path filePath); +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java new file mode 100644 index 0000000..c5bc4e3 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Chore; +import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * Abstract Cleaner that uses a chain of delegates to clean a directory of files + * @param Cleaner delegate class that is dynamically loaded from configuration + */ +public abstract class CleanerChore extends Chore { + + private static final Log LOG = LogFactory.getLog(CleanerChore.class.getName()); + + private final FileSystem fs; + private final Path oldFileDir; + private final Configuration conf; + private List cleanersChain; + + /** + * @param name name of the chore being run + * @param sleepPeriod the period of time to sleep between each run + * @param s the stopper + * @param conf configuration to use + * @param fs handle to the FS + * @param oldFileDir the path to the archived files + * @param confKey configuration key for the classes to instantiate + */ + public CleanerChore(String name, final int sleepPeriod, final Stoppable s, Configuration conf, + FileSystem fs, Path oldFileDir, String confKey) { + super(name, sleepPeriod, s); + this.fs = fs; + this.oldFileDir = oldFileDir; + this.conf = conf; + + initCleanerChain(confKey); + } + + /** + * Validate the file to see if it even belongs in the directory. If it is valid, then the file + * will go through the cleaner delegates, but otherwise the file is just deleted. + * @param file full {@link Path} of the file to be checked + * @return true if the file is valid, false otherwise + */ + protected abstract boolean validate(Path file); + + /** + * Instanitate and initialize all the file cleaners set in the configuration + * @param confKey key to get the file cleaner classes from the configuration + */ + private void initCleanerChain(String confKey) { + this.cleanersChain = new LinkedList(); + String[] logCleaners = conf.getStrings(confKey); + if (logCleaners != null) { + for (String className : logCleaners) { + T logCleaner = newFileCleaner(className, conf); + if (logCleaner != null) this.cleanersChain.add(logCleaner); + } + } + } + + /** + * A utility method to create new instances of LogCleanerDelegate based on the class name of the + * LogCleanerDelegate. + * @param className fully qualified class name of the LogCleanerDelegate + * @param conf + * @return the new instance + */ + public T newFileCleaner(String className, Configuration conf) { + try { + Class c = Class.forName(className).asSubclass( + FileCleanerDelegate.class); + @SuppressWarnings("unchecked") + T cleaner = (T) c.newInstance(); + cleaner.setConf(conf); + return cleaner; + } catch (Exception e) { + LOG.warn("Can NOT create CleanerDelegate: " + className, e); + // skipping if can't instantiate + return null; + } + } + + @Override + public void chore() { + try { + FileStatus[] files = FSUtils.listStatus(this.fs, this.oldFileDir, null); + // if the path (file or directory) doesn't exist, then we can just return + if (files == null) return; + // loop over the found files and see if they should be deleted + for (FileStatus file : files) { + try { + if (file.isDir()) checkDirectory(file.getPath()); + else checkAndDelete(file.getPath()); + } catch (IOException e) { + e = RemoteExceptionHandler.checkIOException(e); + LOG.warn("Error while cleaning the logs", e); + } + } + } catch (IOException e) { + LOG.warn("Failed to get status of:" + oldFileDir); + } + + } + + /** + * Check to see if we can delete a directory (and all the children files of that directory). + *

+ * A directory will not be deleted if it has children that are subsequently deleted since that + * will require another set of lookups in the filesystem, which is semantically same as waiting + * until the next time the chore is run, so we might as well wait. + * @param fs {@link FileSystem} where he directory resides + * @param toCheck directory to check + * @throws IOException + */ + private void checkDirectory(Path toCheck) throws IOException { + LOG.debug("Checking directory: " + toCheck); + FileStatus[] files = checkAndDeleteDirectory(toCheck); + // if the directory doesn't exist, then we are done + if (files == null) return; + + // otherwise we need to check each of the child files + for (FileStatus file : files) { + Path filePath = file.getPath(); + // if its a directory, then check to see if it should be deleted + if (file.isDir()) { + // check the subfiles to see if they can be deleted + checkDirectory(filePath); + continue; + } + // otherwise we can just check the file + checkAndDelete(filePath); + } + + // recheck the directory to see if we can delete it this time + checkAndDeleteDirectory(toCheck); + } + + /** + * Check and delete the passed directory if the directory is empty + * @param toCheck full path to the directory to check (and possibly delete) + * @return null if the directory was empty (and possibly deleted) and otherwise an array + * of FileStatus for the files in the directory + * @throws IOException + */ + private FileStatus[] checkAndDeleteDirectory(Path toCheck) throws IOException { + LOG.debug("Attempting to delete directory:" + toCheck); + // if it doesn't exist, we are done + if (!fs.exists(toCheck)) return null; + // get the files below the directory + FileStatus[] files = FSUtils.listStatus(fs, toCheck, null); + // if there are no subfiles, then we can delete the directory + if (files == null) { + checkAndDelete(toCheck); + return null; + } + + // return the status of the files in the directory + return files; + } + + /** + * Run the given file through each of the cleaners to see if it should be deleted, deleting it if + * necessary. + * @param filePath path of the file to check (and possibly delete) + * @throws IOException if cann't delete a file because of a filesystem issue + * @throws IllegalArgumentException if the file is a directory and has children + */ + private void checkAndDelete(Path filePath) throws IOException, IllegalArgumentException { + if (!validate(filePath)) { + LOG.warn("Found a wrongly formatted file: " + filePath.getName() + "deleting it."); + if (!this.fs.delete(filePath, true)) { + LOG.warn("Attempted to delete:" + filePath + + ", but couldn't. Run cleaner chain and attempt to delete on next pass."); + } + return; + } + for (T cleaner : cleanersChain) { + if (cleaner.isStopped()) { + LOG.warn("A file cleaner" + this.getName() + " is stopped, won't delete any file in:" + + this.oldFileDir); + return; + } + + if (!cleaner.isFileDeleteable(filePath)) { + // this file is not deletable, then we are done + LOG.debug(filePath + " is not deletable according to:" + cleaner); + return; + } + } + // delete this file if it passes all the cleaners + LOG.debug("Removing:" + filePath + " from archive"); + if (!this.fs.delete(filePath, false)) { + LOG.warn("Attempted to delete:" + filePath + + ", but couldn't. Run cleaner chain and attempt to delete on next pass."); + } + } + + + @Override + public void cleanup() { + for (T lc : this.cleanersChain) { + try { + lc.stop("Exiting"); + } catch (Throwable t) { + LOG.warn("Stopping", t); + } + } + } +} diff --git a/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java b/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java new file mode 100644 index 0000000..7494237 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Stoppable; + +/** + * General interface for cleaning files from a folder (generally an archive or + * backup folder). These are chained via the {@link CleanerChore} to determine + * if a given file should be deleted. + */ +@InterfaceAudience.Private +public interface FileCleanerDelegate extends Configurable, Stoppable { + + /** + * Should the master delete the file or keep it? + * @param file full path to the file to check + * @return true if the file is deletable, false if not + */ + public boolean isFileDeleteable(Path file); +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java b/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java new file mode 100644 index 0000000..06a612d --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.regionserver.StoreFile; +/** + * This Chore, every time it runs, will clear the HFiles in the hfile archive + * folder that are deletable for each HFile cleaner in the chain. + */ +@InterfaceAudience.Private +public class HFileCleaner extends CleanerChore { + + public static final String MASTER_HFILE_CLEANER_PLUGINS = "hbase.master.hfilecleaner.plugins"; + + /** + * @param period the period of time to sleep between each run + * @param stopper the stopper + * @param conf configuration to use + * @param fs handle to the FS + * @param directory directory to be cleaned + */ + public HFileCleaner(final int period, final Stoppable stopper, Configuration conf, FileSystem fs, + Path directory) { + super("HFileCleaner", period, stopper, conf, fs, directory, MASTER_HFILE_CLEANER_PLUGINS); + } + + @Override + protected boolean validate(Path file) { + return StoreFile.validateStoreFileName(file.getName()); + } +} diff --git a/src/main/java/org/apache/hadoop/hbase/master/cleaner/LogCleaner.java b/src/main/java/org/apache/hadoop/hbase/master/cleaner/LogCleaner.java new file mode 100644 index 0000000..0e5bd3d --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/cleaner/LogCleaner.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.regionserver.wal.HLog; + +/** + * This Chore, every time it runs, will attempt to delete the HLogs in the old logs folder. The HLog + * is only deleted if none of the cleaner delegates says otherwise. + * @see BaseLogCleanerDelegate + */ +@InterfaceAudience.Private +public class LogCleaner extends CleanerChore { + static final Log LOG = LogFactory.getLog(LogCleaner.class.getName()); + + /** + * @param p the period of time to sleep between each run + * @param s the stopper + * @param conf configuration to use + * @param fs handle to the FS + * @param oldLogDir the path to the archived logs + */ + public LogCleaner(final int p, final Stoppable s, Configuration conf, FileSystem fs, + Path oldLogDir) { + super("LogsCleaner", p, s, conf, fs, oldLogDir, HBASE_MASTER_LOGCLEANER_PLUGINS); + } + + @Override + protected boolean validate(Path file) { + return HLog.validateHLogFilename(file.getName()); + } +} diff --git a/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveHFileCleaner.java b/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveHFileCleaner.java new file mode 100644 index 0000000..1b883a7 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveHFileCleaner.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; + +/** + * HFile cleaner that uses the timestamp of the hfile to determine if it should be deleted. By + * default they are allowed to live for {@value TimeToLiveHFileCleaner#DEFAULT_TTL} + */ +@InterfaceAudience.Private +public class TimeToLiveHFileCleaner extends BaseHFileCleanerDelegate { + + public static final Log LOG = LogFactory.getLog(TimeToLiveHFileCleaner.class.getName()); + public static final String TTL_CONF_KEY = "hbase.master.hfilecleaner.ttl"; + // default ttl = 5 minute + private static final long DEFAULT_TTL = 60000 * 5; + // Configured time a hfile can be kept after it was moved to the archive + private long ttl; + private FileSystem fs; + + @Override + public void setConf(Configuration conf) { + this.ttl = conf.getLong(TTL_CONF_KEY, DEFAULT_TTL); + super.setConf(conf); + } + + @Override + public boolean isFileDeleteable(Path filePath) { + if (!instantiateFS()) { + return false; + } + long time = 0; + long currentTime = EnvironmentEdgeManager.currentTimeMillis(); + try { + FileStatus fStat = fs.getFileStatus(filePath); + time = fStat.getModificationTime(); + } catch (IOException e) { + LOG.error("Unable to get modification time of file " + filePath.getName() + + ", not deleting it.", e); + return false; + } + long life = currentTime - time; + LOG.debug("Life:" + life + ", tt:" + ttl + ", current:" + currentTime + ", from: " + time); + if (life < 0) { + LOG.warn("Found a log (" + filePath + ") newer than current time (" + currentTime + " < " + + time + "), probably a clock skew"); + return false; + } + return life > ttl; + } + + /** + * setup the filesystem, if it hasn't been already + */ + private synchronized boolean instantiateFS() { + if (this.fs == null) { + try { + this.fs = FileSystem.get(this.getConf()); + } catch (IOException e) { + LOG.error("Couldn't instantiate the file system, not deleting file, just incase"); + return false; + } + } + return true; + } +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveLogCleaner.java b/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveLogCleaner.java new file mode 100644 index 0000000..6043a8a --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveLogCleaner.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import java.io.IOException; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Log cleaner that uses the timestamp of the hlog to determine if it should + * be deleted. By default they are allowed to live for 10 minutes. + */ +@InterfaceAudience.Private +public class TimeToLiveLogCleaner extends BaseLogCleanerDelegate { + static final Log LOG = LogFactory.getLog(TimeToLiveLogCleaner.class.getName()); + // Configured time a log can be kept after it was closed + private long ttl; + private boolean stopped = false; + + @Override + public boolean isLogDeletable(Path filePath) { + long time = 0; + long currentTime = System.currentTimeMillis(); + try { + FileStatus fStat = filePath.getFileSystem(this.getConf()).getFileStatus(filePath); + time = fStat.getModificationTime(); + } catch (IOException e) { + LOG.error("Unable to get modification time of file " + filePath.getName() + + ", not deleting it.", e); + return false; + } + long life = currentTime - time; + if (life < 0) { + LOG.warn("Found a log newer than current time, " + + "probably a clock skew"); + return false; + } + return life > ttl; + } + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + this.ttl = conf.getLong("hbase.master.logcleaner.ttl", 600000); + } + + + @Override + public void stop(String why) { + this.stopped = true; + } + + @Override + public boolean isStopped() { + return this.stopped; + } +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index e8d2c26..8d740b8 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -77,6 +77,7 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.UnknownScannerException; +import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.client.Append; import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Delete; @@ -868,16 +869,7 @@ public class HRegion implements HeapSize { // , Writable{ writestate.writesEnabled = false; wasFlushing = writestate.flushing; LOG.debug("Closing " + this + ": disabling compactions & flushes"); - while (writestate.compacting > 0 || writestate.flushing) { - LOG.debug("waiting for " + writestate.compacting + " compactions" + - (writestate.flushing ? " & cache flush" : "") + - " to complete for region " + this); - try { - writestate.wait(); - } catch (InterruptedException iex) { - // continue - } - } + waitForFlushesAndCompactions(); } // If we were not just flushing, is it worth doing a preflush...one // that will clear out of the bulk of the memstore before we put up @@ -952,6 +944,26 @@ public class HRegion implements HeapSize { // , Writable{ } } + /** + * Wait for all current flushes and compactions of the region to complete. + *

+ * Exposed for TESTING. + */ + public void waitForFlushesAndCompactions() { + synchronized (writestate) { + while (writestate.compacting > 0 || writestate.flushing) { + LOG.debug("waiting for " + writestate.compacting + " compactions" + + (writestate.flushing ? " & cache flush" : "") + " to complete for region " + this); + try { + writestate.wait(); + } catch (InterruptedException iex) { + // essentially ignore and propagate the interrupt back up + Thread.currentThread().interrupt(); + } + } + } + } + protected ThreadPoolExecutor getStoreOpenAndCloseThreadPool( final String threadNamePrefix) { int numStores = Math.max(1, this.htableDescriptor.getFamilies().size()); @@ -4062,8 +4074,13 @@ public class HRegion implements HeapSize { // , Writable{ LOG.debug("Files for new region"); listPaths(fs, dstRegion.getRegionDir()); } - deleteRegion(fs, a.getRegionDir()); - deleteRegion(fs, b.getRegionDir()); + + // delete out the 'A' region + HFileArchiver.archiveRegion(fs, FSUtils.getRootDir(a.getConf()), a.getTableDir(), + a.getRegionDir()); + // delete out the 'B' region + HFileArchiver.archiveRegion(fs, FSUtils.getRootDir(b.getConf()), b.getTableDir(), + b.getRegionDir()); LOG.info("merge completed. New region is " + dstRegion); diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java b/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java index 43ff036..70e63f6 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.HeapSize; @@ -457,7 +458,7 @@ public class Store extends SchemaConfigured implements HeapSize { /** * @return All store files. */ - List getStorefiles() { + public List getStorefiles() { return this.storefiles; } @@ -1732,10 +1733,12 @@ public class Store extends SchemaConfigured implements HeapSize { // Tell observers that list of StoreFiles has changed. notifyChangedReadersObservers(); - // Finally, delete old store files. - for (StoreFile hsf: compactedFiles) { - hsf.deleteReader(); - } + + // let the archive util decide if we should archive or delete the files + LOG.debug("Removing store files after compaction..."); + HFileArchiver.archiveStoreFiles(this.fs, this.region, this.conf, this.family.getName(), + compactedFiles); + } catch (IOException e) { e = RemoteExceptionHandler.checkIOException(e); LOG.error("Failed replacing compacted files in " + this.storeNameStr + diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index 76566e5..af3ab83 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -861,13 +861,20 @@ public class StoreFile extends SchemaConfigured { } /** - * Write out a split reference. - * - * Package local so it doesnt leak out of regionserver. - * + * Validate the store file name. + * @param fileName name of the file to validate + * @return true if the file could be a valid store file, false otherwise + */ + public static boolean validateStoreFileName(String fileName) { + return !fileName.contains("-"); + } + + /** + * Write out a split reference. Package local so it doesnt leak out of + * regionserver. * @param fs * @param splitDir Presumes path format is actually - * SOME_DIRECTORY/REGIONNAME/FAMILY. + * SOME_DIRECTORY/REGIONNAME/FAMILY. * @param f File to split. * @param splitRow * @param range diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java index 531d049..5665951 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java @@ -1768,6 +1768,11 @@ public class HLog implements Syncable { return dir; } + /** + * @param filename name of the file to validate + * @return true if the filename matches an HLog, false + * otherwise + */ public static boolean validateHLogFilename(String filename) { return pattern.matcher(filename).matches(); } diff --git a/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java b/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java index 05d0310..21a58db 100644 --- a/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java +++ b/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java @@ -26,7 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.HConnectionManager; -import org.apache.hadoop.hbase.master.LogCleanerDelegate; +import org.apache.hadoop.hbase.master.cleaner.BaseLogCleanerDelegate; import org.apache.hadoop.hbase.replication.ReplicationZookeeper; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.zookeeper.KeeperException; @@ -40,9 +40,8 @@ import java.util.Set; * Implementation of a log cleaner that checks if a log is still scheduled for * replication before deleting it when its TTL is over. */ -public class ReplicationLogCleaner implements LogCleanerDelegate, Abortable { +public class ReplicationLogCleaner extends BaseLogCleanerDelegate implements Abortable { private static final Log LOG = LogFactory.getLog(ReplicationLogCleaner.class); - private Configuration conf; private ReplicationZookeeper zkHelper; private Set hlogs = new HashSet(); private boolean stopped = false; @@ -67,7 +66,7 @@ public class ReplicationLogCleaner implements LogCleanerDelegate, Abortable { // all members of this class are null if replication is disabled, and we // return true since false would render the LogsCleaner useless - if (this.conf == null) { + if (this.getConf() == null) { return true; } String log = filePath.getName(); @@ -122,18 +121,18 @@ public class ReplicationLogCleaner implements LogCleanerDelegate, Abortable { } @Override - public void setConf(Configuration conf) { + public void setConf(Configuration config) { // If replication is disabled, keep all members null - if (!conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY, false)) { + if (!config.getBoolean(HConstants.REPLICATION_ENABLE_KEY, false)) { return; } // Make my own Configuration. Then I'll have my own connection to zk that // I can close myself when comes time. - this.conf = new Configuration(conf); + Configuration conf = new Configuration(config); + super.setConf(conf); try { - ZooKeeperWatcher zkw = - new ZooKeeperWatcher(this.conf, "replicationLogCleaner", null); - this.zkHelper = new ReplicationZookeeper(this, this.conf, zkw); + ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "replicationLogCleaner", null); + this.zkHelper = new ReplicationZookeeper(this, conf, zkw); } catch (KeeperException e) { LOG.error("Error while configuring " + this.getClass().getName(), e); } catch (IOException e) { @@ -142,10 +141,6 @@ public class ReplicationLogCleaner implements LogCleanerDelegate, Abortable { refreshHLogsAndSearch(null); } - @Override - public Configuration getConf() { - return conf; - } @Override public void stop(String why) { @@ -156,7 +151,7 @@ public class ReplicationLogCleaner implements LogCleanerDelegate, Abortable { this.zkHelper.getZookeeperWatcher().close(); } // Not sure why we're deleting a connection that we never acquired or used - HConnectionManager.deleteConnection(this.conf, true); + HConnectionManager.deleteConnection(this.getConf(), true); } @Override diff --git a/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java b/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java index 22f536a..27b61a2 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java +++ b/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -562,6 +562,10 @@ public abstract class FSUtils { return p.makeQualified(fs); } + public static void setRootDir(final Configuration c, final Path root) throws IOException { + c.set(HConstants.HBASE_DIR, root.toString()); + } + /** * Checks if root region exists * @@ -1187,4 +1191,36 @@ public abstract class FSUtils { public static boolean isExists(final FileSystem fs, final Path path) throws IOException { return fs.exists(path); } + + /** + * Log the current state of the filesystem from a certain root directory + * @param fs filesystem to investigate + * @param root root file/directory to start logging from + * @param LOG log to output information + * @throws IOException if an unexpected exception occurs + */ + public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG) + throws IOException { + LOG.debug("Current file system:"); + logFSTree(LOG, fs, root, "|-"); + } + + /** + * Recursive helper to log the state of the FS + * @see #logFileSystemState(FileSystem, Path, Log) + */ + private static void logFSTree(Log LOG, final FileSystem fs, final Path root, String prefix) + throws IOException { + FileStatus[] files = FSUtils.listStatus(fs, root, null); + if (files == null) return; + + for (FileStatus file : files) { + if (file.isDir()) { + LOG.debug(prefix + file.getPath().getName() + "/"); + logFSTree(LOG, fs, file.getPath(), prefix + "---"); + } else { + LOG.debug(prefix + file.getPath().getName()); + } + } + } } diff --git a/src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java b/src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java new file mode 100644 index 0000000..5d712a7 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.Store; + +/** + * Helper class for all utilities related to archival/retrieval of HFiles + */ +public class HFileArchiveUtil { + static final String DEFAULT_HFILE_ARCHIVE_DIRECTORY = ".archive"; + + private HFileArchiveUtil() { + // non-external instantiation - util class + } + + /** + * Get the directory to archive a store directory + * @param conf {@link Configuration} to read for the archive directory name + * @param region parent region information under which the store currently + * lives + * @param family name of the family in the store + * @return {@link Path} to the directory to archive the given store or + * null if it should not be archived + */ + public static Path getStoreArchivePath(Configuration conf, HRegion region, byte [] family){ + return getStoreArchivePath(conf, region.getRegionInfo(), region.getTableDir(), family); + } + + /** + * Get the directory to archive a store directory + * @param conf {@link Configuration} to read for the archive directory name. Can be null. + * @param region parent region information under which the store currently lives + * @param tabledir directory for the table under which the store currently lives + * @param family name of the family in the store + * @return {@link Path} to the directory to archive the given store or null if it should + * not be archived + */ + public static Path getStoreArchivePath(Configuration conf, HRegionInfo region, Path tabledir, + byte[] family) { + Path tableArchiveDir = getTableArchivePath(conf, tabledir); + return Store.getStoreHomedir(tableArchiveDir, + HRegionInfo.encodeRegionName(region.getRegionName()), family); + } + + /** + * Get the archive directory for a given region under the specified table + * @param conf {@link Configuration} to read the archive directory from. Can be null + * @param tabledir the original table directory. Cannot be null. + * @param regiondir the path to the region directory. Cannot be null. + * @return {@link Path} to the directory to archive the given region, or null if it + * should not be archived + */ + public static Path getRegionArchiveDir(Configuration conf, Path tabledir, Path regiondir) { + // get the archive directory for a table + Path archiveDir = getTableArchivePath(conf, tabledir); + + // then add on the region path under the archive + String encodedRegionName = regiondir.getName(); + return HRegion.getRegionDir(archiveDir, encodedRegionName); + } + + /** + * Get the path to the table archive directory based on the configured archive directory. + *

+ * Assumed that the table should already be archived. + * @param conf {@link Configuration} to read the archive directory property. Can be null + * @param tabledir directory of the table to be archived. Cannot be null. + * @return {@link Path} to the archive directory for the table + */ + public static Path getTableArchivePath(Configuration conf, Path tabledir) { + String archiveName = getConfiguredArchiveDirName(conf); + Path root = tabledir.getParent(); + // now build the archive directory path + // first the top-level archive directory + // generally "/hbase/.archive/[table] + return archiveName.length() == 0 ? new Path(root, tabledir) : new Path(new Path(root, + archiveName), tabledir.getName()); + } + + /** + * Get the archive directory as per the configuration + * @param conf {@link Configuration} to read the archive directory from (can be null, in which + * case you get the default value). Can be null. + * @return the configured archived directory or the default specified by + * {@value HFileArchiveUtil#DEFAULT_HFILE_ARCHIVE_DIRECTORY} + */ + public static String getConfiguredArchiveDirName(Configuration conf) { + return conf == null ? HFileArchiveUtil.DEFAULT_HFILE_ARCHIVE_DIRECTORY : conf.get( + HConstants.HFILE_ARCHIVE_DIRECTORY, HFileArchiveUtil.DEFAULT_HFILE_ARCHIVE_DIRECTORY); + } + + /** + * Get the full path to the archive directory on the configured {@link FileSystem} + * @param conf to look for archive directory name and root directory. Cannot be null. Notes for + * testing: requires a FileSystem root directory to be specified. + * @return the full {@link Path} to the archive directory, as defined by the configuration + * @throws IOException if an unexpected error occurs + */ + public static Path getArchivePath(Configuration conf) throws IOException { + return new Path(FSUtils.getRootDir(conf), getConfiguredArchiveDirName(conf)); + } +} diff --git a/src/main/resources/hbase-default.xml b/src/main/resources/hbase-default.xml index 44ee689..f73c477 100644 --- a/src/main/resources/hbase-default.xml +++ b/src/main/resources/hbase-default.xml @@ -299,7 +299,7 @@ hbase.master.logcleaner.plugins - org.apache.hadoop.hbase.master.TimeToLiveLogCleaner + org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner A comma-separated list of LogCleanerDelegate invoked by the LogsCleaner service. These WAL/HLog cleaners are called in order, so put the HLog cleaner that prunes the most HLog files in front. To @@ -869,5 +869,23 @@ In both cases, the aggregated metric M across tables and cfs will be reported. - + + hbase.table.archive.directory + .archive + Per-table directory name under which to backup files for a + table. Files are moved to the same directories as they would be under the + table directory, but instead are just one level lower (under + table/.archive/... rather than table/...). Currently only applies to HFiles. + + + hbase.master.hfilecleaner.plugins + org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner + A comma-separated list of HFileCleanerDelegate invoked by + the HFileCleaner service. These HFiles cleaners are called in order, + so put the cleaner that prunes the most files in front. To + implement your own HFileCleanerDelegate, just put it in HBase's classpath + and add the fully qualified class name here. Always add the above + default log cleaners in the list as they will be overwritten in hbase-site.xml. + + diff --git a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index e15eb47..8877653 100644 --- a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -940,7 +940,11 @@ public class HBaseTestingUtility { * @param tableName existing table */ public void deleteTable(byte[] tableName) throws IOException { - getHBaseAdmin().disableTable(tableName); + try { + getHBaseAdmin().disableTable(tableName); + } catch (TableNotEnabledException e) { + LOG.debug("Table: " + Bytes.toString(tableName) + " already disabled, so just deleting it."); + } getHBaseAdmin().deleteTable(tableName); } diff --git a/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java b/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java new file mode 100644 index 0000000..39dc066 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java @@ -0,0 +1,293 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.backup; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveTestingUtil; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test that the {@link HFileArchiver} correctly removes all the parts of a region when cleaning up + * a region + */ +@Category(MediumTests.class) +public class TestHFileArchiving { + + private static final String STRING_TABLE_NAME = "test_table"; + + private static final Log LOG = LogFactory.getLog(TestHFileArchiving.class); + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static final byte[] TABLE_NAME = Bytes.toBytes(STRING_TABLE_NAME); + private static final byte[] TEST_FAM = Bytes.toBytes("fam"); + + /** + * Setup the config for the cluster + */ + @BeforeClass + public static void setupCluster() throws Exception { + setupConf(UTIL.getConfiguration()); + UTIL.startMiniCluster(); + } + + private static void setupConf(Configuration conf) { + // disable the ui + conf.setInt("hbase.regionsever.info.port", -1); + // drop the memstore size so we get flushes + conf.setInt("hbase.hregion.memstore.flush.size", 25000); + // disable major compactions + conf.setInt(HConstants.MAJOR_COMPACTION_PERIOD, 0); + } + + @Before + public void setup() throws Exception { + UTIL.createTable(TABLE_NAME, TEST_FAM); + } + + @After + public void tearDown() throws Exception { + // cleanup the cluster if its up still + if (UTIL.getHBaseAdmin().tableExists(STRING_TABLE_NAME)) { + UTIL.deleteTable(TABLE_NAME); + } + // and cleanup the archive directory + try { + clearArchiveDirectory(); + } catch (IOException e) { + Assert.fail("Failure to delete archive directory:" + e.getMessage()); + } + } + + @AfterClass + public static void cleanupTest() throws Exception { + try { + UTIL.shutdownMiniCluster(); + } catch (Exception e) { + // NOOP; + } + } + + @Test + public void testRemovesRegionDirOnArchive() throws Exception { + final HBaseAdmin admin = UTIL.getHBaseAdmin(); + + // get the current store files for the region + List servingRegions = UTIL.getHBaseCluster().getRegions(TABLE_NAME); + // make sure we only have 1 region serving this table + assertEquals(1, servingRegions.size()); + HRegion region = servingRegions.get(0); + + // and load the table + UTIL.loadRegion(region, TEST_FAM); + + // shutdown the table so we can manipulate the files + admin.disableTable(STRING_TABLE_NAME); + + FileSystem fs = UTIL.getTestFileSystem(); + + // now attempt to depose the region + Path regionDir = HRegion.getRegionDir(region.getTableDir().getParent(), region.getRegionInfo()); + + HFileArchiver.archiveRegion(fs, region.getRegionInfo()); + + // check for the existence of the archive directory and some files in it + Path archiveDir = HFileArchiveTestingUtil.getRegionArchiveDir(UTIL.getConfiguration(), region); + assertTrue(fs.exists(archiveDir)); + + // check to make sure the store directory was copied + FileStatus[] stores = fs.listStatus(archiveDir); + assertTrue(stores.length == 1); + + // make sure we archived the store files + FileStatus[] storeFiles = fs.listStatus(stores[0].getPath()); + assertTrue(storeFiles.length > 0); + + // then ensure the region's directory isn't present + assertFalse(fs.exists(regionDir)); + } + + /** + * Test that the region directory is removed when we archive a region without store files, but + * still has hidden files. + * @throws Exception + */ + @Test + public void testDeleteRegionWithNoStoreFiles() throws Exception { + // get the current store files for the region + List servingRegions = UTIL.getHBaseCluster().getRegions(TABLE_NAME); + // make sure we only have 1 region serving this table + assertEquals(1, servingRegions.size()); + HRegion region = servingRegions.get(0); + + FileSystem fs = region.getFilesystem(); + + // make sure there are some files in the regiondir + Path rootDir = FSUtils.getRootDir(fs.getConf()); + Path regionDir = HRegion.getRegionDir(rootDir, region.getRegionInfo()); + FileStatus[] regionFiles = FSUtils.listStatus(fs, regionDir, null); + Assert.assertNotNull("No files in the region directory", regionFiles); + if (LOG.isDebugEnabled()) { + List files = new ArrayList(); + for (FileStatus file : regionFiles) { + files.add(file.getPath()); + } + LOG.debug("Current files:" + files); + } + // delete the visible folders so we just have hidden files/folders + final PathFilter dirFilter = new FSUtils.DirFilter(fs); + PathFilter nonHidden = new PathFilter() { + @Override + public boolean accept(Path file) { + return dirFilter.accept(file) && !file.getName().toString().startsWith("."); + } + }; + FileStatus[] storeDirs = FSUtils.listStatus(fs, regionDir, nonHidden); + for (FileStatus store : storeDirs) { + LOG.debug("Deleting store for test"); + fs.delete(store.getPath(), true); + } + + // then archive the region + HFileArchiver.archiveRegion(fs, region.getRegionInfo()); + + // and check to make sure the region directoy got deleted + assertFalse("Region directory (" + regionDir + "), still exists.", fs.exists(regionDir)); + } + + @Test + public void testArchiveOnTableDelete() throws Exception { + List servingRegions = UTIL.getHBaseCluster().getRegions(TABLE_NAME); + // make sure we only have 1 region serving this table + assertEquals(1, servingRegions.size()); + HRegion region = servingRegions.get(0); + + // get the parent RS and monitor + HRegionServer hrs = UTIL.getRSForFirstRegionInTable(TABLE_NAME); + FileSystem fs = hrs.getFileSystem(); + + // put some data on the region + LOG.debug("-------Loading table"); + UTIL.loadRegion(region, TEST_FAM); + + // get the hfiles in the region + List regions = hrs.getOnlineRegions(TABLE_NAME); + assertEquals("More that 1 region for test table.", 1, regions.size()); + + region = regions.get(0); + // wait for all the compactions to complete + region.waitForFlushesAndCompactions(); + + // disable table to prevent new updates + UTIL.getHBaseAdmin().disableTable(TABLE_NAME); + LOG.debug("Disabled table"); + + // remove all the files from the archive to get a fair comparison + clearArchiveDirectory(); + + // then get the current store files + Path regionDir = region.getRegionDir(); + List storeFiles = getAllFileNames(fs, regionDir); + // remove all the non-storefile named files for the region + for (int i = 0; i < storeFiles.size(); i++) { + String file = storeFiles.get(i); + if (file.contains(HRegion.REGIONINFO_FILE) || file.contains("hlog")) { + storeFiles.remove(i--); + } + } + storeFiles.remove(HRegion.REGIONINFO_FILE); + + // then delete the table so the hfiles get archived + UTIL.deleteTable(TABLE_NAME); + + // then get the files in the archive directory. + Path archiveDir = HFileArchiveUtil.getArchivePath(UTIL.getConfiguration()); + List archivedFiles = getAllFileNames(fs, archiveDir); + Collections.sort(storeFiles); + Collections.sort(archivedFiles); + + LOG.debug("Store files:"); + for (int i = 0; i < storeFiles.size(); i++) { + LOG.debug(i + " - " + storeFiles.get(i)); + } + LOG.debug("Archive files:"); + for (int i = 0; i < archivedFiles.size(); i++) { + LOG.debug(i + " - " + archivedFiles.get(i)); + } + + assertTrue("Archived files are missing some of the store files!", + archivedFiles.containsAll(storeFiles)); + } + + private void clearArchiveDirectory() throws IOException { + UTIL.getTestFileSystem().delete(new Path(UTIL.getDefaultRootDirPath(), ".archive"), true); + } + + /** + * Get the names of all the files below the given directory + * @param fs + * @param archiveDir + * @return + * @throws IOException + */ + private List getAllFileNames(final FileSystem fs, Path archiveDir) throws IOException { + FileStatus[] files = FSUtils.listStatus(fs, archiveDir, null); + return recurseOnFiles(fs, files, new ArrayList()); + } + + /** Recursively lookup all the file names under the file[] array **/ + private List recurseOnFiles(FileSystem fs, FileStatus[] files, List fileNames) + throws IOException { + if (files == null || files.length == 0) return fileNames; + + for (FileStatus file : files) { + if (file.isDir()) { + recurseOnFiles(fs, FSUtils.listStatus(fs, file.getPath(), null), fileNames); + } else fileNames.add(file.getPath().getName()); + } + return fileNames; + } +} \ No newline at end of file diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java b/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java index 47bc119..2c6729b 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java @@ -19,11 +19,12 @@ */ package org.apache.hadoop.hbase.master; +import static org.apache.hadoop.hbase.util.HFileArchiveTestingUtil.assertArchiveEqualToOriginal; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.spy; @@ -36,6 +37,8 @@ import java.util.SortedMap; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -60,9 +63,12 @@ import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; import org.mockito.Mockito; @@ -544,6 +550,161 @@ public class TestCatalogJanitor { janitor.join(); } + @Test + public void testArchiveOldRegion() throws Exception { + String table = "table"; + HBaseTestingUtility htu = new HBaseTestingUtility(); + setRootDirAndCleanIt(htu, "testCleanParent"); + Server server = new MockServer(htu); + MasterServices services = new MockMasterServices(server); + + // create the janitor + CatalogJanitor janitor = new CatalogJanitor(server, services); + + // Create regions. + HTableDescriptor htd = new HTableDescriptor(table); + htd.addFamily(new HColumnDescriptor("f")); + HRegionInfo parent = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"), Bytes.toBytes("eee")); + HRegionInfo splita = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"), Bytes.toBytes("ccc")); + HRegionInfo splitb = new HRegionInfo(htd.getName(), Bytes.toBytes("ccc"), Bytes.toBytes("eee")); + // Test that when both daughter regions are in place, that we do not + // remove the parent. + List kvs = new ArrayList(); + kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY, + HConstants.SPLITA_QUALIFIER, Writables.getBytes(splita))); + kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY, + HConstants.SPLITB_QUALIFIER, Writables.getBytes(splitb))); + Result r = new Result(kvs); + + FileSystem fs = FileSystem.get(htu.getConfiguration()); + Path rootdir = services.getMasterFileSystem().getRootDir(); + // have to set the root directory since we use it in HFileDisposer to figure out to get to the + // archive directory. Otherwise, it just seems to pick the first root directory it can find (so + // the single test passes, but when the full suite is run, things get borked). + FSUtils.setRootDir(fs.getConf(), rootdir); + Path tabledir = HTableDescriptor.getTableDir(rootdir, htd.getName()); + Path storedir = Store.getStoreHomedir(tabledir, parent.getEncodedName(), + htd.getColumnFamilies()[0].getName()); + + // delete the file and ensure that the files have been archived + Path storeArchive = HFileArchiveUtil.getStoreArchivePath(services.getConfiguration(), parent, + tabledir, htd.getColumnFamilies()[0].getName()); + + // enable archiving, make sure that files get archived + addMockStoreFiles(2, services, storedir); + // get the current store files for comparison + FileStatus[] storeFiles = fs.listStatus(storedir); + for (FileStatus file : storeFiles) { + System.out.println("Have store file:" + file.getPath()); + } + + // do the cleaning of the parent + assertTrue(janitor.cleanParent(parent, r)); + + // and now check to make sure that the files have actually been archived + FileStatus[] archivedStoreFiles = fs.listStatus(storeArchive); + assertArchiveEqualToOriginal(storeFiles, archivedStoreFiles, fs); + + // cleanup + services.stop("Test finished"); + server.stop("shutdown"); + janitor.join(); + } + + /** + * Test that if a store file with the same name is present as those already backed up cause the + * already archived files to be timestamped backup + */ + @Test + public void testDuplicateHFileResolution() throws Exception { + String table = "table"; + HBaseTestingUtility htu = new HBaseTestingUtility(); + setRootDirAndCleanIt(htu, "testCleanParent"); + Server server = new MockServer(htu); + MasterServices services = new MockMasterServices(server); + + // create the janitor + CatalogJanitor janitor = new CatalogJanitor(server, services); + + // Create regions. + HTableDescriptor htd = new HTableDescriptor(table); + htd.addFamily(new HColumnDescriptor("f")); + HRegionInfo parent = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"), Bytes.toBytes("eee")); + HRegionInfo splita = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"), Bytes.toBytes("ccc")); + HRegionInfo splitb = new HRegionInfo(htd.getName(), Bytes.toBytes("ccc"), Bytes.toBytes("eee")); + // Test that when both daughter regions are in place, that we do not + // remove the parent. + List kvs = new ArrayList(); + kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY, + HConstants.SPLITA_QUALIFIER, Writables.getBytes(splita))); + kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY, + HConstants.SPLITB_QUALIFIER, Writables.getBytes(splitb))); + Result r = new Result(kvs); + + FileSystem fs = FileSystem.get(htu.getConfiguration()); + + Path rootdir = services.getMasterFileSystem().getRootDir(); + // have to set the root directory since we use it in HFileDisposer to figure out to get to the + // archive directory. Otherwise, it just seems to pick the first root directory it can find (so + // the single test passes, but when the full suite is run, things get borked). + FSUtils.setRootDir(fs.getConf(), rootdir); + Path tabledir = HTableDescriptor.getTableDir(rootdir, parent.getTableName()); + Path storedir = Store.getStoreHomedir(tabledir, parent.getEncodedName(), + htd.getColumnFamilies()[0].getName()); + System.out.println("Old root:" + rootdir); + System.out.println("Old table:" + tabledir); + System.out.println("Old store:" + storedir); + + Path storeArchive = HFileArchiveUtil.getStoreArchivePath(services.getConfiguration(), parent, + tabledir, htd.getColumnFamilies()[0].getName()); + System.out.println("Old archive:" + storeArchive); + + // enable archiving, make sure that files get archived + addMockStoreFiles(2, services, storedir); + // get the current store files for comparison + FileStatus[] storeFiles = fs.listStatus(storedir); + + // do the cleaning of the parent + assertTrue(janitor.cleanParent(parent, r)); + + // and now check to make sure that the files have actually been archived + FileStatus[] archivedStoreFiles = fs.listStatus(storeArchive); + assertArchiveEqualToOriginal(storeFiles, archivedStoreFiles, fs); + + // now add store files with the same names as before to check backup + // enable archiving, make sure that files get archived + addMockStoreFiles(2, services, storedir); + + // do the cleaning of the parent + assertTrue(janitor.cleanParent(parent, r)); + + // and now check to make sure that the files have actually been archived + archivedStoreFiles = fs.listStatus(storeArchive); + assertArchiveEqualToOriginal(storeFiles, archivedStoreFiles, fs, true); + + // cleanup + services.stop("Test finished"); + server.stop("shutdown"); + janitor.join(); + } + + private void addMockStoreFiles(int count, MasterServices services, Path storedir) + throws IOException { + // get the existing store files + FileSystem fs = services.getMasterFileSystem().getFileSystem(); + fs.mkdirs(storedir); + // create the store files in the parent + for (int i = 0; i < count; i++) { + Path storeFile = new Path(storedir, "_store" + i); + FSDataOutputStream dos = fs.create(storeFile, true); + dos.writeBytes("Some data: " + i); + dos.close(); + } + // make sure the mock store files are there + FileStatus[] storeFiles = fs.listStatus(storedir); + assertEquals(count, storeFiles.length); + } + private Result makeResultFromHRegionInfo(HRegionInfo region, HRegionInfo splita, HRegionInfo splitb) throws IOException { List kvs = new ArrayList(); diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java b/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java deleted file mode 100644 index ffda68d..0000000 --- a/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Copyright 2009 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master; - -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.net.URLEncoder; -import java.util.concurrent.atomic.AtomicBoolean; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.*; -import org.apache.hadoop.hbase.catalog.CatalogTracker; -import org.apache.hadoop.hbase.replication.ReplicationZookeeper; -import org.apache.hadoop.hbase.replication.regionserver.Replication; -import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -@Category(MediumTests.class) -public class TestLogsCleaner { - - private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); - - /** - * @throws java.lang.Exception - */ - @BeforeClass - public static void setUpBeforeClass() throws Exception { - TEST_UTIL.startMiniZKCluster(); - } - - /** - * @throws java.lang.Exception - */ - @AfterClass - public static void tearDownAfterClass() throws Exception { - TEST_UTIL.shutdownMiniZKCluster(); - } - - @Test - public void testLogCleaning() throws Exception{ - Configuration conf = TEST_UTIL.getConfiguration(); - // set TTL - long ttl = 2000; - conf.setLong("hbase.master.logcleaner.ttl", ttl); - conf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, true); - Replication.decorateMasterConfiguration(conf); - Server server = new DummyServer(); - ReplicationZookeeper zkHelper = - new ReplicationZookeeper(server, new AtomicBoolean(true)); - - Path oldLogDir = new Path(TEST_UTIL.getDataTestDir(), - HConstants.HREGION_OLDLOGDIR_NAME); - String fakeMachineName = - URLEncoder.encode(server.getServerName().toString(), "UTF8"); - - FileSystem fs = FileSystem.get(conf); - LogCleaner cleaner = new LogCleaner(1000, server, conf, fs, oldLogDir); - - // Create 2 invalid files, 1 "recent" file, 1 very new file and 30 old files - long now = System.currentTimeMillis(); - fs.delete(oldLogDir, true); - fs.mkdirs(oldLogDir); - // Case 1: 2 invalid files, which would be deleted directly - fs.createNewFile(new Path(oldLogDir, "a")); - fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + "a")); - // Case 2: 1 "recent" file, not even deletable for the first log cleaner - // (TimeToLiveLogCleaner), so we are not going down the chain - System.out.println("Now is: " + now); - for (int i = 1; i < 31; i++) { - // Case 3: old files which would be deletable for the first log cleaner - // (TimeToLiveLogCleaner), and also for the second (ReplicationLogCleaner) - Path fileName = new Path(oldLogDir, fakeMachineName + "." + (now - i) ); - fs.createNewFile(fileName); - // Case 4: put 3 old log files in ZK indicating that they are scheduled - // for replication so these files would pass the first log cleaner - // (TimeToLiveLogCleaner) but would be rejected by the second - // (ReplicationLogCleaner) - if (i % (30/3) == 1) { - zkHelper.addLogToList(fileName.getName(), fakeMachineName); - System.out.println("Replication log file: " + fileName); - } - } - - // sleep for sometime to get newer modifcation time - Thread.sleep(ttl); - fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + now)); - - // Case 2: 1 newer file, not even deletable for the first log cleaner - // (TimeToLiveLogCleaner), so we are not going down the chain - fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + (now + 10000) )); - - for (FileStatus stat : fs.listStatus(oldLogDir)) { - System.out.println(stat.getPath().toString()); - } - - assertEquals(34, fs.listStatus(oldLogDir).length); - - cleaner.chore(); - - // We end up with the current log file, a newer one and the 3 old log - // files which are scheduled for replication - assertEquals(5, fs.listStatus(oldLogDir).length); - - for (FileStatus file : fs.listStatus(oldLogDir)) { - System.out.println("Kept log files: " + file.getPath().getName()); - } - } - - static class DummyServer implements Server { - - @Override - public Configuration getConfiguration() { - return TEST_UTIL.getConfiguration(); - } - - @Override - public ZooKeeperWatcher getZooKeeper() { - try { - return new ZooKeeperWatcher(getConfiguration(), "dummy server", this); - } catch (IOException e) { - e.printStackTrace(); - } - return null; - } - - @Override - public CatalogTracker getCatalogTracker() { - return null; - } - - @Override - public ServerName getServerName() { - return new ServerName("regionserver,60020,000000"); - } - - @Override - public void abort(String why, Throwable e) {} - - @Override - public boolean isAborted() { - return false; - } - - @Override - public void stop(String why) {} - - @Override - public boolean isStopped() { - return false; - } - } - - @org.junit.Rule - public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = - new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); -} - diff --git a/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileCleaner.java b/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileCleaner.java new file mode 100644 index 0000000..4286d61 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileCleaner.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestHFileCleaner { + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @Test + public void testHFileCleaning() throws Exception{ + String prefix = "someHFileThatWouldBeAUUID"; + Configuration conf = TEST_UTIL.getConfiguration(); + // set TTL + long ttl = 2000; + conf.setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, ttl); + Server server = new DummyServer(); + Path archivedHfileDir = new Path(TEST_UTIL.getDataTestDir(), + HFileArchiveUtil.getConfiguredArchiveDirName(conf)); + FileSystem fs = FileSystem.get(conf); + HFileCleaner cleaner = new HFileCleaner(1000, server, conf, fs, archivedHfileDir); + + // Create 2 invalid files, 1 "recent" file, 1 very new file and 30 old files + long now = System.currentTimeMillis(); + fs.delete(archivedHfileDir, true); + fs.mkdirs(archivedHfileDir); + // Case 1: 1 invalid file, which would be deleted directly + fs.createNewFile(new Path(archivedHfileDir, "dfd-dfd")); + // Case 2: 1 "recent" file, not even deletable for the first log cleaner + // (TimeToLiveLogCleaner), so we are not going down the chain + System.out.println("Now is: " + now); + for (int i = 1; i < 32; i++) { + // Case 3: old files which would be deletable for the first log cleaner + // (TimeToLiveHFileCleaner), + Path fileName = new Path(archivedHfileDir, (prefix + "." + (now - i))); + fs.createNewFile(fileName); + } + + // sleep for sometime to get newer modifcation time + Thread.sleep(ttl); + + // Case 2: 1 newer file, not even deletable for the first log cleaner + // (TimeToLiveLogCleaner), so we are not going down the chain + fs.createNewFile(new Path(archivedHfileDir, prefix + "." + (now + 10000))); + + for (FileStatus stat : fs.listStatus(archivedHfileDir)) { + System.out.println(stat.getPath().toString()); + } + + assertEquals(33, fs.listStatus(archivedHfileDir).length); + + cleaner.chore(); + + // We end up a small number - just the one newer one + assertEquals(1, fs.listStatus(archivedHfileDir).length); + + for (FileStatus file : fs.listStatus(archivedHfileDir)) { + System.out.println("Kept log files: " + file.getPath().getName()); + } + + cleaner.interrupt(); + } + + static class DummyServer implements Server { + + @Override + public Configuration getConfiguration() { + return TEST_UTIL.getConfiguration(); + } + + @Override + public ZooKeeperWatcher getZooKeeper() { + try { + return new ZooKeeperWatcher(getConfiguration(), "dummy server", this); + } catch (IOException e) { + e.printStackTrace(); + } + return null; + } + + @Override + public CatalogTracker getCatalogTracker() { + return null; + } + + @Override + public ServerName getServerName() { + return new ServerName("regionserver,60020,000000"); + } + + @Override + public void abort(String why, Throwable e) {} + + @Override + public boolean isAborted() { + return false; + } + + @Override + public void stop(String why) {} + + @Override + public boolean isStopped() { + return false; + } + } + + @org.junit.Rule + public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = + new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); +} + diff --git a/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestLogsCleaner.java b/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestLogsCleaner.java new file mode 100644 index 0000000..e38309c --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestLogsCleaner.java @@ -0,0 +1,182 @@ +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.net.URLEncoder; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.master.cleaner.LogCleaner; +import org.apache.hadoop.hbase.replication.ReplicationZookeeper; +import org.apache.hadoop.hbase.replication.regionserver.Replication; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(MediumTests.class) +public class TestLogsCleaner { + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + /** + * @throws java.lang.Exception + */ + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.startMiniZKCluster(); + } + + /** + * @throws java.lang.Exception + */ + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniZKCluster(); + } + + @Test + public void testLogCleaning() throws Exception{ + Configuration conf = TEST_UTIL.getConfiguration(); + // set TTL + long ttl = 2000; + conf.setLong("hbase.master.logcleaner.ttl", ttl); + conf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, true); + Replication.decorateMasterConfiguration(conf); + Server server = new DummyServer(); + ReplicationZookeeper zkHelper = + new ReplicationZookeeper(server, new AtomicBoolean(true)); + + Path oldLogDir = new Path(TEST_UTIL.getDataTestDir(), + HConstants.HREGION_OLDLOGDIR_NAME); + String fakeMachineName = + URLEncoder.encode(server.getServerName().toString(), "UTF8"); + + FileSystem fs = FileSystem.get(conf); + LogCleaner cleaner = new LogCleaner(1000, server, conf, fs, oldLogDir); + + // Create 2 invalid files, 1 "recent" file, 1 very new file and 30 old files + long now = System.currentTimeMillis(); + fs.delete(oldLogDir, true); + fs.mkdirs(oldLogDir); + // Case 1: 2 invalid files, which would be deleted directly + fs.createNewFile(new Path(oldLogDir, "a")); + fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + "a")); + // Case 2: 1 "recent" file, not even deletable for the first log cleaner + // (TimeToLiveLogCleaner), so we are not going down the chain + System.out.println("Now is: " + now); + for (int i = 1; i < 31; i++) { + // Case 3: old files which would be deletable for the first log cleaner + // (TimeToLiveLogCleaner), and also for the second (ReplicationLogCleaner) + Path fileName = new Path(oldLogDir, fakeMachineName + "." + (now - i) ); + fs.createNewFile(fileName); + // Case 4: put 3 old log files in ZK indicating that they are scheduled + // for replication so these files would pass the first log cleaner + // (TimeToLiveLogCleaner) but would be rejected by the second + // (ReplicationLogCleaner) + if (i % (30/3) == 1) { + zkHelper.addLogToList(fileName.getName(), fakeMachineName); + System.out.println("Replication log file: " + fileName); + } + } + + // sleep for sometime to get newer modifcation time + Thread.sleep(ttl); + fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + now)); + + // Case 2: 1 newer file, not even deletable for the first log cleaner + // (TimeToLiveLogCleaner), so we are not going down the chain + fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + (now + 10000) )); + + for (FileStatus stat : fs.listStatus(oldLogDir)) { + System.out.println(stat.getPath().toString()); + } + + assertEquals(34, fs.listStatus(oldLogDir).length); + + cleaner.chore(); + + // We end up with the current log file, a newer one and the 3 old log + // files which are scheduled for replication + assertEquals(5, fs.listStatus(oldLogDir).length); + + for (FileStatus file : fs.listStatus(oldLogDir)) { + System.out.println("Kept log files: " + file.getPath().getName()); + } + } + + static class DummyServer implements Server { + + @Override + public Configuration getConfiguration() { + return TEST_UTIL.getConfiguration(); + } + + @Override + public ZooKeeperWatcher getZooKeeper() { + try { + return new ZooKeeperWatcher(getConfiguration(), "dummy server", this); + } catch (IOException e) { + e.printStackTrace(); + } + return null; + } + + @Override + public CatalogTracker getCatalogTracker() { + return null; + } + + @Override + public ServerName getServerName() { + return new ServerName("regionserver,60020,000000"); + } + + @Override + public void abort(String why, Throwable e) {} + + @Override + public boolean isAborted() { + return false; + } + + @Override + public void stop(String why) {} + + @Override + public boolean isStopped() { + return false; + } + } + + @org.junit.Rule + public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = + new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); +} + diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/CheckedArchivingHFileCleaner.java b/src/test/java/org/apache/hadoop/hbase/regionserver/CheckedArchivingHFileCleaner.java new file mode 100644 index 0000000..3136e0d --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/CheckedArchivingHFileCleaner.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; + +/** + * HFile archive cleaner that just tells you if it has been run already or not (and allows resets) - + * always attempts to delete the passed file. + *

+ * Just a helper class for testing to make sure the cleaner has been run. + */ +public class CheckedArchivingHFileCleaner extends BaseHFileCleanerDelegate { + + private static boolean checked; + + @Override + public boolean isFileDeleteable(Path file) { + checked = true; + return true; + } + + public static boolean getChecked() { + return checked; + } + + public static void resetCheck() { + checked = false; + } +} diff --git a/src/test/java/org/apache/hadoop/hbase/util/HFileArchiveTestingUtil.java b/src/test/java/org/apache/hadoop/hbase/util/HFileArchiveTestingUtil.java new file mode 100644 index 0000000..7609e0e --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/util/HFileArchiveTestingUtil.java @@ -0,0 +1,239 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.Store; + +/** + * Test helper for testing archiving of HFiles + */ +public class HFileArchiveTestingUtil { + + private static final Log LOG = LogFactory.getLog(HFileArchiveTestingUtil.class); + + private HFileArchiveTestingUtil() { + // NOOP private ctor since this is just a utility class + } + + public static boolean compareArchiveToOriginal(FileStatus[] previous, FileStatus[] archived, + FileSystem fs, boolean hasTimedBackup) { + + List> lists = getFileLists(previous, archived); + List original = lists.get(0); + Collections.sort(original); + + List currentFiles = lists.get(1); + Collections.sort(currentFiles); + + List backedup = lists.get(2); + Collections.sort(backedup); + + // check the backed up files versus the current (should match up, less the + // backup time in the name) + if (!hasTimedBackup == (backedup.size() > 0)) { + LOG.debug("backedup files doesn't match expected."); + return false; + } + String msg = null; + if (hasTimedBackup) { + msg = assertArchiveEquality(original, backedup); + if (msg != null) { + LOG.debug(msg); + return false; + } + } + msg = assertArchiveEquality(original, currentFiles); + if (msg != null) { + LOG.debug(msg); + return false; + } + return true; + } + + /** + * Compare the archived files to the files in the original directory + * @param previous original files that should have been archived + * @param archived files that were archived + * @param fs filessystem on which the archiving took place + * @throws IOException + */ + public static void assertArchiveEqualToOriginal(FileStatus[] previous, FileStatus[] archived, + FileSystem fs) throws IOException { + assertArchiveEqualToOriginal(previous, archived, fs, false); + } + + /** + * Compare the archived files to the files in the original directory + * @param previous original files that should have been archived + * @param archived files that were archived + * @param fs {@link FileSystem} on which the archiving took place + * @param hasTimedBackup true if we expect to find an archive backup directory with a + * copy of the files in the archive directory (and the original files). + * @throws IOException + */ + public static void assertArchiveEqualToOriginal(FileStatus[] previous, FileStatus[] archived, + FileSystem fs, boolean hasTimedBackup) throws IOException { + + List> lists = getFileLists(previous, archived); + List original = lists.get(0); + Collections.sort(original); + + List currentFiles = lists.get(1); + Collections.sort(currentFiles); + + List backedup = lists.get(2); + Collections.sort(backedup); + + // check the backed up files versus the current (should match up, less the + // backup time in the name) + assertEquals("Didn't expect any backup files, but got: " + backedup, hasTimedBackup, + backedup.size() > 0); + String msg = null; + if (hasTimedBackup) { + assertArchiveEquality(original, backedup); + assertNull(msg, msg); + } + + // do the rest of the comparison + msg = assertArchiveEquality(original, currentFiles); + assertNull(msg, msg); + } + + private static String assertArchiveEquality(List expected, List archived) { + String compare = compareFileLists(expected, archived); + if (!(expected.size() == archived.size())) return "Not the same number of current files\n" + + compare; + if (!expected.equals(archived)) return "Different backup files, but same amount\n" + compare; + return null; + } + + /** + * @return , where each is sorted + */ + private static List> getFileLists(FileStatus[] previous, FileStatus[] archived) { + List> files = new ArrayList>(); + + // copy over the original files + List originalFileNames = convertToString(previous); + files.add(originalFileNames); + + List currentFiles = new ArrayList(previous.length); + List backedupFiles = new ArrayList(previous.length); + for (FileStatus f : archived) { + String name = f.getPath().getName(); + // if the file has been backed up + if (name.contains(".")) { + Path parent = f.getPath().getParent(); + String shortName = name.split("[.]")[0]; + Path modPath = new Path(parent, shortName); + FileStatus file = new FileStatus(f.getLen(), f.isDir(), f.getReplication(), + f.getBlockSize(), f.getModificationTime(), modPath); + backedupFiles.add(file); + } else { + // otherwise, add it to the list to compare to the original store files + currentFiles.add(name); + } + } + + files.add(currentFiles); + files.add(convertToString(backedupFiles)); + return files; + } + + private static List convertToString(FileStatus[] files) { + return convertToString(Arrays.asList(files)); + } + + private static List convertToString(List files) { + List originalFileNames = new ArrayList(files.size()); + for (FileStatus f : files) { + originalFileNames.add(f.getPath().getName()); + } + return originalFileNames; + } + + /* Get a pretty representation of the differences */ + private static String compareFileLists(List expected, List gotten) { + StringBuilder sb = new StringBuilder("Expected (" + expected.size() + "): \t\t Gotten (" + + gotten.size() + "):\n"); + List notFound = new ArrayList(); + for (String s : expected) { + if (gotten.contains(s)) sb.append(s + "\t\t" + s + "\n"); + else notFound.add(s); + } + sb.append("Not Found:\n"); + for (String s : notFound) { + sb.append(s + "\n"); + } + sb.append("\nExtra:\n"); + for (String s : gotten) { + if (!expected.contains(s)) sb.append(s + "\n"); + } + return sb.toString(); + } + + /** + * Helper method to get the archive directory for the specified region + * @param conf {@link Configuration} to check for the name of the archive directory + * @param region region that is being archived + * @return {@link Path} to the archive directory for the given region + */ + public static Path getRegionArchiveDir(Configuration conf, HRegion region) { + return HFileArchiveUtil.getRegionArchiveDir(conf, region.getTableDir(), region.getRegionDir()); + } + + /** + * Helper method to get the store archive directory for the specified region + * @param conf {@link Configuration} to check for the name of the archive directory + * @param region region that is being archived + * @param store store that is archiving files + * @return {@link Path} to the store archive directory for the given region + */ + public static Path getStoreArchivePath(Configuration conf, HRegion region, Store store) { + return HFileArchiveUtil.getStoreArchivePath(conf, region, store.getFamily().getName()); + } + + public static Path getStoreArchivePath(HBaseTestingUtility util, String tableName, + byte[] storeName) throws IOException { + byte[] table = Bytes.toBytes(tableName); + // get the RS and region serving our table + List servingRegions = util.getHBaseCluster().getRegions(table); + HRegion region = servingRegions.get(0); + + // check that we actually have some store files that were archived + Store store = region.getStore(storeName); + return HFileArchiveTestingUtil.getStoreArchivePath(util.getConfiguration(), region, store); + } +} diff --git a/src/test/java/org/apache/hadoop/hbase/util/TestHFileArchiveUtil.java b/src/test/java/org/apache/hadoop/hbase/util/TestHFileArchiveUtil.java new file mode 100644 index 0000000..86a6014 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/util/TestHFileArchiveUtil.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.Assert.*; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +/** + * Test that the utility works as expected + */ +@Category(SmallTests.class) +public class TestHFileArchiveUtil { + + @Test + public void testGetConfiguredArchiveDir() { + assertEquals(HFileArchiveUtil.DEFAULT_HFILE_ARCHIVE_DIRECTORY, + HFileArchiveUtil.getConfiguredArchiveDirName(null)); + Configuration conf = new Configuration(); + assertEquals(HFileArchiveUtil.DEFAULT_HFILE_ARCHIVE_DIRECTORY, + HFileArchiveUtil.getConfiguredArchiveDirName(conf)); + conf.set(HConstants.HFILE_ARCHIVE_DIRECTORY, ""); + assertEquals("", HFileArchiveUtil.getConfiguredArchiveDirName(conf)); + String archiveDir = "somearchive"; + conf.set(HConstants.HFILE_ARCHIVE_DIRECTORY, archiveDir); + assertEquals(archiveDir, HFileArchiveUtil.getConfiguredArchiveDirName(conf)); + } + + @Test + public void testGetTableArchivePath() { + assertNotNull(HFileArchiveUtil.getTableArchivePath(null, new Path("table"))); + Configuration conf = new Configuration(); + conf.set(HConstants.HFILE_ARCHIVE_DIRECTORY, ""); + assertNotNull(HFileArchiveUtil.getTableArchivePath(conf, new Path("root", new Path("table")))); + } + + @Test + public void testGetArchivePath() throws Exception { + Configuration conf = new Configuration(); + FSUtils.setRootDir(conf, new Path("root")); + assertNotNull(HFileArchiveUtil.getArchivePath(conf)); + String archiveDir = "somearchive"; + conf.set(HConstants.HFILE_ARCHIVE_DIRECTORY, archiveDir); + assertEquals(new Path(FSUtils.getRootDir(conf), archiveDir), + HFileArchiveUtil.getArchivePath(conf)); + } + + @Test + public void testRegionArchiveDir() { + Path tableDir = new Path("table"); + Path regionDir = new Path("region"); + assertNotNull(HFileArchiveUtil.getRegionArchiveDir(null, tableDir, regionDir)); + } + + @Test + public void testGetStoreArchivePath(){ + byte[] family = Bytes.toBytes("Family"); + Path tabledir = new Path("table"); + HRegionInfo region = new HRegionInfo(Bytes.toBytes("table")); + Configuration conf = null; + assertNotNull(HFileArchiveUtil.getStoreArchivePath(conf, region, tabledir, family)); + conf = new Configuration(); + assertNotNull(HFileArchiveUtil.getStoreArchivePath(conf, region, tabledir, family)); + conf.set(HConstants.HFILE_ARCHIVE_DIRECTORY, "archiveDir"); + assertNotNull(HFileArchiveUtil.getStoreArchivePath(conf, region, tabledir, family)); + + // do a little mocking of a region to get the same results + HRegion mockRegion = Mockito.mock(HRegion.class); + Mockito.when(mockRegion.getRegionInfo()).thenReturn(region); + Mockito.when(mockRegion.getTableDir()).thenReturn(tabledir); + + assertNotNull(HFileArchiveUtil.getStoreArchivePath(null, mockRegion, family)); + conf = new Configuration(); + assertNotNull(HFileArchiveUtil.getStoreArchivePath(conf, mockRegion, family)); + conf.set(HConstants.HFILE_ARCHIVE_DIRECTORY, "archiveDir"); + assertNotNull(HFileArchiveUtil.getStoreArchivePath(conf, mockRegion, family)); + + } +}