diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java index 7cc31d0..a479fdf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java @@ -24,9 +24,13 @@ import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import org.apache.commons.cli.CommandLine; @@ -57,12 +61,14 @@ import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; +import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.BloomFilter; import org.apache.hadoop.hbase.util.BloomFilterUtil; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; @@ -95,6 +101,8 @@ private boolean checkRow; private boolean checkFamily; private boolean isSeekToRow = false; + private boolean checkMob = false; + private Map> mobFileLocations; /** * The row which the user wants to specify and print all the KeyValues for. @@ -130,6 +138,7 @@ private void init() { options.addOption("w", "seekToRow", true, "Seek to this row and print all the kvs for this row only"); options.addOption("s", "stats", false, "Print statistics"); + options.addOption("i", "checkMob", false, "Print all cells whose mob files are missing"); OptionGroup files = new OptionGroup(); files.addOption(new Option("f", "file", true, @@ -158,6 +167,7 @@ public boolean parseOptions(String args[]) throws ParseException, printStats = cmd.hasOption("s"); checkRow = cmd.hasOption("k"); checkFamily = cmd.hasOption("a"); + checkMob = cmd.hasOption("i"); if (cmd.hasOption("f")) { files.add(new Path(cmd.getOptionValue("f"))); @@ -199,6 +209,12 @@ public boolean parseOptions(String args[]) throws ParseException, files.addAll(regionFiles); } + if(checkMob) { + if (verbose) { + System.out.println("checkMob is enabled"); + } + mobFileLocations = new HashMap>(); + } return true; } @@ -255,7 +271,7 @@ private void processFile(Path file) throws IOException { KeyValueStatsCollector fileStats = null; - if (verbose || printKey || checkRow || checkFamily || printStats) { + if (verbose || printKey || checkRow || checkFamily || printStats || checkMob) { // scan over file and read key/value's and check if requested HFileScanner scanner = reader.getScanner(false, false, false); fileStats = new KeyValueStatsCollector(); @@ -313,6 +329,9 @@ private void processFile(Path file) throws IOException { private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner, byte[] row) throws IOException { Cell pCell = null; + FileSystem fs = FileSystem.get(getConf()); + Set foundMobFiles = new LinkedHashSet(100); + Set missingMobFiles = new LinkedHashSet(20); do { Cell cell = scanner.getKeyValue(); if (row != null && row.length != 0) { @@ -369,12 +388,88 @@ private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, + "\n\tcurrent -> " + CellUtil.getCellKeyAsString(cell)); } } + // check if mob files are missing. + if (checkMob && MobUtils.isMobReferenceCell(cell)) { + Tag tnTag = MobUtils.getTableNameTag(cell); + if (tnTag == null) { + System.err.println("ERROR, wrong tag format in mob reference cell " + + CellUtil.getCellKeyAsString(cell)); + } else if (!MobUtils.hasValidMobRefCellValue(cell)) { + System.err.println("ERROR, wrong value format in mob reference cell " + + CellUtil.getCellKeyAsString(cell)); + } else { + TableName tn = TableName.valueOf(tnTag.getValue()); + String mobFileName = MobUtils.getMobFileName(cell); + boolean exist = mobFileExists(fs, tn, mobFileName, + Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles); + if (!exist) { + // report error + System.err.println("ERROR, the mob file [" + mobFileName + + "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell)); + } + } + } pCell = cell; ++count; } while (scanner.next()); } /** + * Checks whether the referenced mob file exists. + */ + private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family, + Set foundMobFiles, Set missingMobFiles) throws IOException { + if (foundMobFiles.contains(mobFileName)) { + return true; + } + if (missingMobFiles.contains(mobFileName)) { + return false; + } + String tableName = tn.getNameAsString(); + List locations = mobFileLocations.get(tableName); + if (locations == null) { + locations = new ArrayList(2); + locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family)); + locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn, + MobUtils.getMobRegionInfo(tn).getEncodedName(), family)); + mobFileLocations.put(tn.getNameAsString(), locations); + } + boolean exist = false; + for (Path location : locations) { + Path mobFilePath = new Path(location, mobFileName); + if (fs.exists(mobFilePath)) { + exist = true; + break; + } + } + if (exist) { + evictMobFilesIfNecessary(foundMobFiles, 50); + foundMobFiles.add(mobFileName); + } else { + evictMobFilesIfNecessary(missingMobFiles, 20); + missingMobFiles.add(mobFileName); + } + return exist; + } + + /** + * Evicts the cached mob files if the set is larger than the limit. + */ + private void evictMobFilesIfNecessary(Set mobFileNames, int limit) { + if (mobFileNames.size() < limit) { + return; + } + int index = 0; + int evict = limit / 2; + Iterator fnir = mobFileNames.iterator(); + while (index < evict && fnir.hasNext()) { + fnir.next(); + fnir.remove(); + index++; + } + } + + /** * Format a string of the form "k1=v1, k2=v2, ..." into separate lines * with a four-space indentation. */