diff --git src/main/java/org/apache/hadoop/hbase/util/HFileV1Detector.java src/main/java/org/apache/hadoop/hbase/util/HFileV1Detector.java new file mode 100644 index 0000000..34c79454 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/util/HFileV1Detector.java @@ -0,0 +1,412 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.io.FileLink; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +/** + * Tool to detect presence of any HFileV1 in the given directory. It prints all such regions which + * have such files. + *

+ * To print the help section of the tool: + *

+ * It also supports -h, --help, -help options. + *

+ */ +public class HFileV1Detector extends Configured implements Tool { + private FileSystem fs; + private static final Log LOG = LogFactory.getLog(HFileV1Detector.class); + private static final int DEFAULT_NUM_OF_THREADS = 10; + /** + * Pre-namespace archive directory + */ + private static final String PRE_NS_DOT_ARCHIVE = ".archive"; + /** + * Pre-namespace tmp directory + */ + private static final String PRE_NS_DOT_TMP = ".tmp"; + private int numOfThreads; + /** + * directory to start the processing. + */ + private Path targetDirPath; + /** + * executor for processing regions. + */ + private ExecutorService exec; + + /** + * Keeps record of processed tables. + */ + private final Set processedTables = new HashSet(); + /** + * set of corrupted HFiles (with undetermined major version) + */ + private final Set corruptedHFiles = Collections + .newSetFromMap(new ConcurrentHashMap()); + /** + * set of HfileV1; + */ + private final Set hFileV1Set = Collections + .newSetFromMap(new ConcurrentHashMap()); + + private Options options = new Options(); + + private Path defaultRootDir = null; + public HFileV1Detector() { + Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation"); + pathOption.setRequired(false); + options.addOption(pathOption); + Option threadOption = new Option("n", "numberOfThreads", true, + "Number of threads to use while processing HFiles."); + threadOption.setRequired(false); + options.addOption(threadOption); + options.addOption("h", "help", false, "Help"); + } + + private boolean parseOption(String[] args) throws ParseException, IOException { + if (args.length == 0) { + return true; // no args will process with default values. + } + CommandLineParser parser = new GnuParser(); + CommandLine cmd = parser.parse(options, args); + if (cmd.hasOption("h")) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("HFileV1Detector", options, true); + System.out + .println("In case no option is provided, it processes hbase.rootdir using 10 threads."); + System.out.println("Example:"); + System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':"); + System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase"); + System.out.println(); + return false; + } + + if (cmd.hasOption("p")) { + this.targetDirPath = new Path(FSUtils.getRootDir(getConf()), cmd.getOptionValue("p")); + } + try { + if (cmd.hasOption("n")) { + int n = Integer.parseInt(cmd.getOptionValue("n")); + if (n < 0 || n > 100) { + LOG.warn("Please use a positive number <= 100 for number of threads." + + " Continuing with default value " + DEFAULT_NUM_OF_THREADS); + return true; + } + this.numOfThreads = n; + } + } catch (NumberFormatException nfe) { + LOG.error("Please select a valid number for threads"); + return false; + } + return true; + } + + /** + * Checks for HFileV1. + * @return 0 when no HFileV1 is present. + * 1 when a HFileV1 is present or, when there is a file with corrupt major version + * (neither V1 nor V2). + * -1 in case of any error/exception + */ + @Override + public int run(String args[]) throws IOException, ParseException { + Path root = new Path(FSUtils.getRootDir(getConf()).toUri()); + getConf().set("fs.defaultFS", root.toString()); // for hadoop 0.21+ + fs = FileSystem.get(getConf()); + numOfThreads = DEFAULT_NUM_OF_THREADS; + targetDirPath = FSUtils.getRootDir(getConf()); + if (!parseOption(args)) { + System.exit(-1); + } + this.exec = Executors.newFixedThreadPool(numOfThreads); + try { + return processResult(checkForV1Files(targetDirPath)); + } catch (Exception e) { + LOG.error(e); + } finally { + exec.shutdown(); + fs.close(); + } + return -1; + } + + private int processResult(Set regionsWithHFileV1) { + LOG.info("Result: \n"); + printSet(processedTables, "Tables Processed: "); + + int count = hFileV1Set.size(); + LOG.info("Count of HFileV1: " + count); + if (count > 0) printSet(hFileV1Set, "HFileV1:"); + + count = corruptedHFiles.size(); + LOG.info("Count of corrupted files: " + count); + if (count > 0) printSet(corruptedHFiles, "Corrupted Files: "); + + count = regionsWithHFileV1.size(); + LOG.info("Count of Regions with HFileV1: " + count); + if (count > 0) printSet(regionsWithHFileV1, "Regions to Major Compact: "); + + return (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) ? 0 : 1; + } + + private void printSet(Set result, String msg) { + LOG.info(msg); + for (Path p : result) { + LOG.info(p); + } + } + + /** + * Takes a directory path, and lists out any HFileV1, if present. + * @param targetDir directory to start looking for HFilev1. + * @return set of Regions that have HFileV1 + * @throws IOException + */ + private Set checkForV1Files(Path targetDir) throws IOException { + LOG.info("Target dir is: " + targetDir); + if (!fs.exists(targetDir)) { + throw new IOException("The given path does not exist: " + targetDir); + } + if (isTableDir(fs, targetDir)) { + processedTables.add(targetDir); + return processTable(targetDir); + } + Set regionsWithHFileV1 = new HashSet(); + FileStatus[] fsStats = fs.listStatus(targetDir); + for (FileStatus fsStat : fsStats) { + if (isTableDir(fs, fsStat.getPath()) && !isRootTable(fsStat.getPath())) { + processedTables.add(fsStat.getPath()); + // look for regions and find out any v1 file. + regionsWithHFileV1.addAll(processTable(fsStat.getPath())); + } else { + LOG.info("Ignoring path: " + fsStat.getPath()); + } + } + return regionsWithHFileV1; + } + + /** + * Ignore ROOT table as it doesn't exist in 0.96. + * @param path + */ + private boolean isRootTable(Path path) { + if (path != null && path.toString().endsWith("-ROOT-")) return true; + return false; + } + + /** + * Find out regions in the table which have HFileV1. + * @param tableDir + * @return the set of regions containing HFile v1. + * @throws IOException + */ + private Set processTable(Path tableDir) throws IOException { + // list out the regions and then process each file in it. + LOG.debug("processing table: " + tableDir); + List> regionLevelResults = new ArrayList>(); + Set regionsWithHFileV1 = new HashSet(); + + FileStatus[] fsStats = fs.listStatus(tableDir); + for (FileStatus fsStat : fsStats) { + // process each region + if (isRegionDir(fs, fsStat.getPath())) { + regionLevelResults.add(processRegion(fsStat.getPath())); + } + } + for (Future f : regionLevelResults) { + try { + if (f.get() != null) { + regionsWithHFileV1.add(f.get()); + } + } catch (InterruptedException e) { + LOG.error(e); + } catch (ExecutionException e) { + LOG.error(e); // might be a bad hfile. We print it at the end. + } + } + return regionsWithHFileV1; + } + + /** + * Each region is processed by a separate handler. If a HRegion has a hfileV1, its path is + * returned as the future result, otherwise, a null value is returned. + * @param regionDir Region to process. + * @return corresponding Future object. + */ + private Future processRegion(final Path regionDir) { + LOG.debug("processing region: " + regionDir); + Callable regionCallable = new Callable() { + @Override + public Path call() throws Exception { + for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) { + FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir); + if (storeFiles == null || storeFiles.length == 0) continue; + for (FileStatus storeFile : storeFiles) { + Path storeFilePath = storeFile.getPath(); + FSDataInputStream fsdis = null; + long lenToRead = 0; + try { + // check whether this path is a reference. + if (StoreFile.isReference(storeFilePath)) continue; + // check whether this path is a HFileLink. + else if (HFileLink.isHFileLink(storeFilePath)) { + FileLink fLink = getFileLinkWithPreNSPath(storeFilePath); + fsdis = fLink.open(fs); + lenToRead = fLink.getFileStatus(fs).getLen(); + } else { + // a regular hfile + fsdis = fs.open(storeFilePath); + lenToRead = storeFile.getLen(); + } + int majorVersion = computeMajorVersion(fsdis, lenToRead); + if (majorVersion == 1) { + hFileV1Set.add(storeFilePath); + // return this region path, as it needs to be compacted. + return regionDir; + } + if (majorVersion > 2 || majorVersion < 1) throw new IllegalArgumentException( + "Incorrect major version: " + majorVersion); + } catch (Exception iae) { + corruptedHFiles.add(storeFilePath); + LOG.error("Got exception while reading trailer for file: "+ storeFilePath, iae); + } finally { + if (fsdis != null) fsdis.close(); + } + } + } + return null; + } + + private int computeMajorVersion(FSDataInputStream istream, long fileSize) + throws IOException { + //read up the last int of the file. Major version is in the last 3 bytes. + long seekPoint = fileSize - Bytes.SIZEOF_INT; + if (seekPoint < 0) + throw new IllegalArgumentException("File too small, no major version found"); + + // Read the version from the last int of the file. + istream.seek(seekPoint); + int version = istream.readInt(); + // Extract and return the major version + return version & 0x00ffffff; + } + }; + Future f = exec.submit(regionCallable); + return f; + } + + /** + * Creates a FileLink which adds pre-namespace paths in its list of available paths. This is used + * when reading a snapshot file in a pre-namespace file layout, for example, while upgrading. + * @param storeFilePath + * @return a FileLink which could read from pre-namespace paths. + * @throws IOException + */ + public FileLink getFileLinkWithPreNSPath(Path storeFilePath) throws IOException { + HFileLink link = new HFileLink(getConf(), storeFilePath); + List pathsToProcess = getPreNSPathsForHFileLink(link); + pathsToProcess.addAll(Arrays.asList(link.getLocations())); + return new FileLink(pathsToProcess); + } + + private List getPreNSPathsForHFileLink(HFileLink fileLink) throws IOException { + List p = new ArrayList(); + String relativeTablePath = removeDefaultNSPath(fileLink.getOriginPath()); + p.add(getPreNSPath(PRE_NS_DOT_ARCHIVE, relativeTablePath)); + p.add(getPreNSPath(PRE_NS_DOT_TMP, relativeTablePath)); + p.add(getPreNSPath(null, relativeTablePath)); + return p; + } + + /** + * Removes the prefix of defaultNamespace from the path. + * @param originPath + * @throws IOException + */ + private String removeDefaultNSPath(Path originalPath) throws IOException { + if (defaultRootDir == null) { + defaultRootDir = FSUtils.getRootDir(getConf()); + } + String pathStr = originalPath.toString(); + if (!pathStr.startsWith(defaultRootDir.toString())) return pathStr; + return pathStr.substring(defaultRootDir.toString().length() + 1); + } + + private Path getPreNSPath(String prefix, String relativeTablePath) throws IOException { + String relativePath = (prefix == null ? relativeTablePath : prefix + Path.SEPARATOR + + relativeTablePath); + return new Path(FSUtils.getRootDir(getConf()), relativePath); + } + + private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException { + // check for old format, of having /table/.tableinfo; hbase:meta doesn't has .tableinfo, + // include it. + if (fs.isFile(path)) return false; + return (FSTableDescriptors.getTableInfoPath(fs, path) != null) + || path.toString().endsWith(".META."); + } + + private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException { + if (fs.isFile(path)) return false; + Path regionInfo = new Path(path, ".regioninfo"); + return fs.exists(regionInfo); + + } + + public static void main(String args[]) throws Exception { + System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args)); + } + +}