commit 11aed0e86d2f0bf4f67e000645b0be411a990aa9 Author: Todd Lipcon Date: Fri May 27 10:19:31 2011 -0700 hfile stats and compaction fix diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index c3aae1c..cb52fac 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -48,6 +48,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.KeyValue.KeyComparator; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HRegionInfo; @@ -1906,6 +1907,8 @@ public class HFile { "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34"); options.addOption("r", "region", true, "Region to scan. Pass region name; e.g. '.META.,,1'"); + options.addOption("s", "stats", false, "Print statistics"); + if (args.length == 0) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("HFile ", options, true); @@ -1920,6 +1923,7 @@ public class HFile { boolean printBlocks = cmd.hasOption("b"); boolean checkRow = cmd.hasOption("k"); boolean checkFamily = cmd.hasOption("a"); + boolean printStats = cmd.hasOption("s"); // get configuration, file system and get list of files Configuration conf = HBaseConfiguration.create(); conf.set("fs.defaultFS", @@ -1963,7 +1967,8 @@ public class HFile { HFile.Reader reader = new HFile.Reader(fs, file, null, false, false); Map fileInfo = reader.loadFileInfo(); int count = 0; - if (verbose || printKey || checkRow || checkFamily) { + KeyValueStatsCollector fileStats = new KeyValueStatsCollector(); + if (verbose || printKey || checkRow || checkFamily || printStats) { // scan over file and read key/value's and check if requested HFileScanner scanner = reader.getScanner(false, false); scanner.seekTo(); @@ -1978,9 +1983,13 @@ public class HFile { } System.out.println(); } + // collect stats + if (printStats) { + fileStats.collect(kv); + } // check if rows are in order if (checkRow && pkv != null) { - if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) { + if (KeyValue.COMPARATOR.compareRows(pkv, kv) > 0) { System.err.println("WARNING, previous row is greater then" + " current row\n\tfilename -> " + file + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey()) + @@ -2051,10 +2060,92 @@ public class HFile { System.out.println("Block Index:"); System.out.println(reader.blockIndex); } + if (printStats) { + fileStats.finish(); + System.out.println("Stats:\n" + fileStats); + } reader.close(); } } catch (Exception e) { e.printStackTrace(); } } + + private static class LongStats { + private long min = Long.MAX_VALUE; + private long max = Long.MIN_VALUE; + private long sum = 0; + private long count = 0; + + void collect(long d) { + if (d < min) min = d; + if (d > max) max = d; + sum += d; + count++; + } + + public String toString() { + return "count: " + count + + "\tmin: " + min + + "\tmax: " + max + + "\tmean: " + ((double)sum/count); + } + + } + + private static class KeyValueStatsCollector { + LongStats keyLen = new LongStats(); + LongStats valLen = new LongStats(); + LongStats rowSizeBytes = new LongStats(); + LongStats rowSizeCols = new LongStats(); + + long curRowBytes = 0; + long curRowCols = 0; + + byte[] biggestRow = null; + + private KeyValue pkv = null; + private long maxRowBytes = 0; + + public void collect(KeyValue kv) { + keyLen.collect(kv.getKeyLength()); + valLen.collect(kv.getValueLength()); + if (pkv != null && + KeyValue.COMPARATOR.compareRows(pkv, kv) != 0) { + // new row + collectRow(); + } + curRowBytes += kv.getLength(); + curRowCols++; + pkv = kv; + } + + private void collectRow() { + rowSizeBytes.collect(curRowBytes); + rowSizeCols.collect(curRowCols); + + if (curRowBytes > maxRowBytes && pkv != null) { + biggestRow = pkv.getRow(); + } + + curRowBytes = 0; + curRowCols = 0; + } + + public void finish() { + if (curRowCols > 0) { + collectRow(); + } + } + + @Override + public String toString() { + return + "Key length: " + keyLen + "\n" + + "Val length: " + valLen + "\n" + + "Row size (bytes): " + rowSizeBytes + "\n" + + "Row size (columns): " + rowSizeCols + "\n" + + "Key of biggest row: " + Bytes.toStringBinary(biggestRow); + } + } }