diff --git a/src/java/org/apache/hadoop/hbase/HMsg.java b/src/java/org/apache/hadoop/hbase/HMsg.java index 11c7a3d..ec14665 100644 --- a/src/java/org/apache/hadoop/hbase/HMsg.java +++ b/src/java/org/apache/hadoop/hbase/HMsg.java @@ -191,7 +191,7 @@ public class HMsg implements Writable { // If null or empty region, don't bother printing it out. if (this.info != null && this.info.getRegionName().length > 0) { sb.append(": "); - sb.append(this.info.toString()); + sb.append(this.info.getRegionNameAsString()); } if (this.message != null && this.message.length > 0) { sb.append(": " + Bytes.toString(this.message)); diff --git a/src/java/org/apache/hadoop/hbase/HRegionInfo.java b/src/java/org/apache/hadoop/hbase/HRegionInfo.java index 11bae89..5c0c4cf 100644 --- a/src/java/org/apache/hadoop/hbase/HRegionInfo.java +++ b/src/java/org/apache/hadoop/hbase/HRegionInfo.java @@ -43,7 +43,7 @@ public class HRegionInfo extends VersionedWritable implements WritableComparable * @return the encodedName */ public static int encodeRegionName(final byte [] regionName) { - return Math.abs(JenkinsHash.getInstance().hash(regionName, regionName.length, 0)); + return Math.abs(JenkinsHash.getInstance().hash(regionName, 0, regionName.length, 0)); } /** delimiter used between portions of a region name */ @@ -90,7 +90,7 @@ public class HRegionInfo extends VersionedWritable implements WritableComparable this.regionId = regionId; this.tableDesc = tableDesc; this.regionName = createRegionName(tableDesc.getName(), null, regionId); - this.regionNameStr = Bytes.toString(this.regionName); + this.regionNameStr = Bytes.toStringBinary(this.regionName); setHashCode(); } @@ -151,7 +151,7 @@ public class HRegionInfo extends VersionedWritable implements WritableComparable this.offLine = false; this.regionId = regionid; this.regionName = createRegionName(tableDesc.getName(), startKey, regionId); - this.regionNameStr = Bytes.toString(this.regionName); + this.regionNameStr = Bytes.toStringBinary(this.regionName); this.split = split; this.endKey = endKey == null? HConstants.EMPTY_END_ROW: endKey.clone(); this.startKey = startKey == null? @@ -171,7 +171,7 @@ public class HRegionInfo extends VersionedWritable implements WritableComparable this.offLine = other.isOffline(); this.regionId = other.getRegionId(); this.regionName = other.getRegionName(); - this.regionNameStr = Bytes.toString(this.regionName); + this.regionNameStr = Bytes.toStringBinary(this.regionName); this.split = other.isSplit(); this.startKey = other.getStartKey(); this.tableDesc = other.getTableDesc(); @@ -361,8 +361,8 @@ public class HRegionInfo extends VersionedWritable implements WritableComparable return "REGION => {" + HConstants.NAME + " => '" + this.regionNameStr + "', STARTKEY => '" + - Bytes.toString(this.startKey) + "', ENDKEY => '" + - Bytes.toString(this.endKey) + + Bytes.toStringBinary(this.startKey) + "', ENDKEY => '" + + Bytes.toStringBinary(this.endKey) + "', ENCODED => " + getEncodedName() + "," + (isOffline()? " OFFLINE => true,": "") + (isSplit()? " SPLIT => true,": "") + @@ -424,7 +424,7 @@ public class HRegionInfo extends VersionedWritable implements WritableComparable this.offLine = in.readBoolean(); this.regionId = in.readLong(); this.regionName = Bytes.readByteArray(in); - this.regionNameStr = Bytes.toString(this.regionName); + this.regionNameStr = Bytes.toStringBinary(this.regionName); this.split = in.readBoolean(); this.startKey = Bytes.readByteArray(in); this.tableDesc.readFields(in); diff --git a/src/java/org/apache/hadoop/hbase/HStoreKey.java b/src/java/org/apache/hadoop/hbase/HStoreKey.java index 88b9415..5068431 100644 --- a/src/java/org/apache/hadoop/hbase/HStoreKey.java +++ b/src/java/org/apache/hadoop/hbase/HStoreKey.java @@ -456,7 +456,7 @@ public class HStoreKey implements WritableComparable, HeapSize { return getDelimiter(b, 0, b.length, COLUMN_FAMILY_DELIMITER); } - private static int getRequiredDelimiterInReverse(final byte [] b, + static int getRequiredDelimiterInReverse(final byte [] b, final int offset, final int length, final int delimiter) { int index = getDelimiterInReverse(b, offset, length, delimiter); if (index < 0) { @@ -471,7 +471,7 @@ public class HStoreKey implements WritableComparable, HeapSize { * @return Index of delimiter having started from end of b moving * leftward. */ - private static int getDelimiter(final byte [] b, int offset, final int length, + static int getDelimiter(final byte [] b, int offset, final int length, final int delimiter) { if (b == null) { throw new NullPointerException(); diff --git a/src/java/org/apache/hadoop/hbase/KeyValue.java b/src/java/org/apache/hadoop/hbase/KeyValue.java index d7b4424..2d7904f 100644 --- a/src/java/org/apache/hadoop/hbase/KeyValue.java +++ b/src/java/org/apache/hadoop/hbase/KeyValue.java @@ -545,6 +545,7 @@ public class KeyValue implements Writable, HeapSize { } /** + * Use for logging. * @param b Key portion of a KeyValue. * @param o Offset to start of key * @param l Length of key. @@ -552,7 +553,7 @@ public class KeyValue implements Writable, HeapSize { */ public static String keyToString(final byte [] b, final int o, final int l) { int rowlength = Bytes.toShort(b, o); - String row = Bytes.toString(b, o + Bytes.SIZEOF_SHORT, rowlength); + String row = Bytes.toStringBinary(b, o + Bytes.SIZEOF_SHORT, rowlength); int columnoffset = o + Bytes.SIZEOF_SHORT + 1 + rowlength; int familylength = b[columnoffset - 1]; int columnlength = l - ((columnoffset - o) + TIMESTAMP_TYPE_SIZE); @@ -613,7 +614,7 @@ public class KeyValue implements Writable, HeapSize { } public String getKeyString() { - return Bytes.toString(getBuffer(), getKeyOffset(), getKeyLength()); + return Bytes.toStringBinary(getBuffer(), getKeyOffset(), getKeyLength()); } /** diff --git a/src/java/org/apache/hadoop/hbase/RegionHistorian.java b/src/java/org/apache/hadoop/hbase/RegionHistorian.java index df08ce7..bde8a08 100644 --- a/src/java/org/apache/hadoop/hbase/RegionHistorian.java +++ b/src/java/org/apache/hadoop/hbase/RegionHistorian.java @@ -101,7 +101,7 @@ public class RegionHistorian implements HConstants { * Region name as a string * @return List of RegionHistoryInformation or null if we're offline. */ - public List getRegionHistory(String regionName) { + public List getRegionHistory(byte [] regionName) { if (!isOnline()) { return null; } @@ -115,7 +115,7 @@ public class RegionHistorian implements HConstants { */ for (HistorianColumnKey keyEnu : HistorianColumnKey.values()) { byte[] columnKey = keyEnu.key; - Cell[] cells = this.metaTable.get(Bytes.toBytes(regionName), + Cell[] cells = this.metaTable.get(regionName, columnKey, ALL_VERSIONS); if (cells != null) { for (Cell cell : cells) { diff --git a/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java b/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java index 48143e9..cc26268 100644 --- a/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java +++ b/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java @@ -547,7 +547,7 @@ public class HConnectionManager implements HConstants { for (int tries = 0; true; tries++) { if (tries >= numRetries) { throw new NoServerForRegionException("Unable to find region for " - + Bytes.toString(row) + " after " + numRetries + " tries."); + + Bytes.toStringBinary(row) + " after " + numRetries + " tries."); } try { diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 58f6cf9..d600187 100644 --- a/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -106,9 +106,6 @@ import org.apache.hadoop.io.compress.Decompressor; *

* TODO: Bloomfilters. Need to add hadoop 0.20. first since it has bug fixes * on the hadoop bf package. - * * TODO: USE memcmp by default? Write the keys out in an order that allows - * my using this -- reverse the timestamp. - * TODO: Add support for fast-gzip and for lzo. * TODO: Do scanners need to be able to take a start and end row? * TODO: Should BlockIndex know the name of its file? Should it have a Path * that points at its file say for the case where an index lives apart from @@ -159,7 +156,7 @@ public class HFile { // Name for this object used when logging or in toString. Is either // the result of a toString on stream or else toString of passed file Path. - private String name; + protected String name; // Total uncompressed bytes, maybe calculate a compression ratio later. private int totalBytes = 0; @@ -222,7 +219,7 @@ public class HFile { */ public Writer(FileSystem fs, Path path) throws IOException { - this(fs, path, DEFAULT_BLOCKSIZE, null, null, false); + this(fs, path, DEFAULT_BLOCKSIZE, (Compression.Algorithm) null, null); } /** @@ -241,7 +238,7 @@ public class HFile { this(fs, path, blocksize, compress == null? DEFAULT_COMPRESSION_ALGORITHM: Compression.getCompressionAlgorithmByName(compress), - comparator, false); + comparator); } /** @@ -251,15 +248,13 @@ public class HFile { * @param blocksize * @param compress * @param comparator - * @param bloomfilter * @throws IOException */ public Writer(FileSystem fs, Path path, int blocksize, Compression.Algorithm compress, - final RawComparator comparator, - final boolean bloomfilter) + final RawComparator comparator) throws IOException { - this(fs.create(path), blocksize, compress, comparator, bloomfilter); + this(fs.create(path), blocksize, compress, comparator); this.closeOutputStream = true; this.name = path.toString(); this.path = path; @@ -274,26 +269,22 @@ public class HFile { * @throws IOException */ public Writer(final FSDataOutputStream ostream, final int blocksize, - final String compress, final RawComparator c) + final String compress, final RawComparator c) throws IOException { this(ostream, blocksize, - compress == null? DEFAULT_COMPRESSION_ALGORITHM: - Compression.getCompressionAlgorithmByName(compress), c, false); + Compression.getCompressionAlgorithmByName(compress), c); } - + /** * Constructor that takes a stream. * @param ostream Stream to use. * @param blocksize * @param compress * @param c - * @param bloomfilter * @throws IOException */ public Writer(final FSDataOutputStream ostream, final int blocksize, - final Compression.Algorithm compress, - final RawComparator c, - final boolean bloomfilter) + final Compression.Algorithm compress, final RawComparator c) throws IOException { this.outputStream = ostream; this.closeOutputStream = false; @@ -727,11 +718,11 @@ public class HFile { } protected String toStringFirstKey() { - return Bytes.toString(getFirstKey()); + return Bytes.toStringBinary(getFirstKey()); } protected String toStringLastKey() { - return Bytes.toString(getFirstKey()); + return Bytes.toStringBinary(getFirstKey()); } public long length() { @@ -1187,7 +1178,7 @@ public class HFile { } public String getKeyString() { - return Bytes.toString(block.array(), block.arrayOffset() + + return Bytes.toStringBinary(block.array(), block.arrayOffset() + block.position(), currKeyLen); } @@ -1238,6 +1229,10 @@ public class HFile { } } } + + public String getTrailerInfo() { + return trailer.toString(); + } } /* * The RFile has a fixed trailer which contains offsets to other variable @@ -1265,11 +1260,9 @@ public class HFile { static int trailerSize() { // Keep this up to date... - final int intSize = 4; - final int longSize = 8; return - ( intSize * 5 ) + - ( longSize * 4 ) + + ( Bytes.SIZEOF_INT * 5 ) + + ( Bytes.SIZEOF_LONG * 4 ) + TRAILERBLOCKMAGIC.length; } diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/HFileStats.java b/src/java/org/apache/hadoop/hbase/io/hfile/HFileStats.java new file mode 100644 index 0000000..19efdae --- /dev/null +++ b/src/java/org/apache/hadoop/hbase/io/hfile/HFileStats.java @@ -0,0 +1,58 @@ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HStoreKey; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Bytes; + +public class HFileStats { + + + public static void main(String []args) throws IOException { + if (args.length < 1) { + System.out.println("usage: -- dumps hfile stats"); + return; + } + + HBaseConfiguration conf = new HBaseConfiguration(); + + FileSystem fs = FileSystem.get(conf); + + Path path = new Path(args[0]); + + if (!fs.exists(path)) { + System.out.println("File doesnt exist: " + path); + return; + } + + HFile.Reader reader = new HFile.Reader(fs, path, null); + Map fileInfo = reader.loadFileInfo(); + + System.out.println("Block index size as per heapsize: " + reader.indexSize()); + //System.out.println("Block index size as per sizeof: " + SizeOf.deepSizeOf(reader.blockIndex)); + System.out.println(reader.toString()); + System.out.println(reader.getTrailerInfo()); + System.out.println("Fileinfo:"); + for ( Map.Entry e : fileInfo.entrySet()) { + System.out.print(Bytes.toString(e.getKey()) + " = " ); + + if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY"))==0) { + long seqid = Bytes.toLong(e.getValue()); + System.out.println(seqid); + } else { + System.out.println(Bytes.toStringBinary(e.getValue())); + } + + } + + reader.close(); + } +} diff --git a/src/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java b/src/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java index 38d56b8..bbd76f2 100644 --- a/src/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java +++ b/src/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java @@ -46,6 +46,7 @@ public class TableInputFormat extends TableInputFormatBase implements * wildcards */ public static final String COLUMN_LIST = "hbase.mapred.tablecolumns"; + public static final String SCANNER_SIZE = "hbase.mapred.scanner_size"; public void configure(JobConf job) { Path[] tableNames = FileInputFormat.getInputPaths(job); @@ -57,7 +58,11 @@ public class TableInputFormat extends TableInputFormatBase implements } setInputColumns(m_cols); try { - setHTable(new HTable(new HBaseConfiguration(job), tableNames[0].getName())); + HTable table = new HTable(new HBaseConfiguration(job), tableNames[0].getName()); + int scanSize = job.getInt(SCANNER_SIZE, 0); + if (scanSize > 0) + table.setScannerCaching(scanSize); + setHTable(table); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } diff --git a/src/java/org/apache/hadoop/hbase/mapred/TableSplit.java b/src/java/org/apache/hadoop/hbase/mapred/TableSplit.java index 435e2a7..b754b6a 100644 --- a/src/java/org/apache/hadoop/hbase/mapred/TableSplit.java +++ b/src/java/org/apache/hadoop/hbase/mapred/TableSplit.java @@ -103,7 +103,7 @@ public class TableSplit implements InputSplit, Comparable { @Override public String toString() { return m_regionLocation + ":" + - Bytes.toString(m_startRow) + "," + Bytes.toString(m_endRow); + Bytes.toStringBinary(m_startRow) + "," + Bytes.toStringBinary(m_endRow); } public int compareTo(TableSplit o) { diff --git a/src/java/org/apache/hadoop/hbase/master/BaseScanner.java b/src/java/org/apache/hadoop/hbase/master/BaseScanner.java index a2c5e8e..7079aef 100644 --- a/src/java/org/apache/hadoop/hbase/master/BaseScanner.java +++ b/src/java/org/apache/hadoop/hbase/master/BaseScanner.java @@ -316,8 +322,9 @@ abstract class BaseScanner extends Chore implements HConstants { } if (LOG.isDebugEnabled()) { - LOG.debug(split.getRegionNameAsString() + - " no longer has references to " + Bytes.toString(parent)); + LOG.debug(split.getRegionNameAsString() + "/" + split.getEncodedName() + + " no longer has references to " + Bytes.toStringBinary(parent) + ); } BatchUpdate b = new BatchUpdate(parent); diff --git a/src/java/org/apache/hadoop/hbase/master/HMaster.java b/src/java/org/apache/hadoop/hbase/master/HMaster.java index 6e8c613..839bcbb 100644 --- a/src/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/java/org/apache/hadoop/hbase/master/HMaster.java @@ -941,6 +941,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, } // Arguments are regionname and an optional server name. byte [] regionname = ((ImmutableBytesWritable)args[0]).get(); + LOG.debug("Attempting to close region: " + Bytes.toStringBinary(regionname)); String servername = null; if (args.length == 2) { servername = Bytes.toString(((ImmutableBytesWritable)args[1]).get()); diff --git a/src/java/org/apache/hadoop/hbase/master/RegionManager.java b/src/java/org/apache/hadoop/hbase/master/RegionManager.java index b50a89c..8129377 100644 --- a/src/java/org/apache/hadoop/hbase/master/RegionManager.java +++ b/src/java/org/apache/hadoop/hbase/master/RegionManager.java @@ -499,7 +499,7 @@ class RegionManager implements HConstants { if (currentRegion.isRootRegion() || currentRegion.isMetaTable()) { continue; } - String regionName = currentRegion.getRegionNameAsString(); + final String regionName = currentRegion.getRegionNameAsString(); if (regionIsInTransition(regionName)) { skipped++; continue; diff --git a/src/java/org/apache/hadoop/hbase/master/TableDelete.java b/src/java/org/apache/hadoop/hbase/master/TableDelete.java index 526fe32..0bde1b1 100644 --- a/src/java/org/apache/hadoop/hbase/master/TableDelete.java +++ b/src/java/org/apache/hadoop/hbase/master/TableDelete.java @@ -45,6 +45,7 @@ class TableDelete extends TableOperation { protected void processScanItem(String serverName, final HRegionInfo info) throws IOException { if (isEnabled(info)) { + LOG.debug("Region still enabled: " + info.toString()); throw new TableNotDisabledException(tableName); } } diff --git a/src/java/org/apache/hadoop/hbase/master/TableOperation.java b/src/java/org/apache/hadoop/hbase/master/TableOperation.java index 2127bd0..857e4d8 100644 --- a/src/java/org/apache/hadoop/hbase/master/TableOperation.java +++ b/src/java/org/apache/hadoop/hbase/master/TableOperation.java @@ -95,7 +95,7 @@ abstract class TableOperation implements HConstants { if (info == null) { emptyRows.add(values.getRow()); LOG.error(Bytes.toString(COL_REGIONINFO) + " not found on " + - Bytes.toString(values.getRow())); + Bytes.toStringBinary(values.getRow())); continue; } String serverAddress = Writables.cellToString(values.get(COL_SERVER)); diff --git a/src/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java b/src/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java index 6e48bb5..b8284be 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java @@ -114,14 +114,14 @@ class CompactSplitThread extends Thread implements HConstants { continue; } catch (IOException ex) { LOG.error("Compaction/Split failed" + - (r != null ? (" for region " + Bytes.toString(r.getRegionName())) : ""), + (r != null ? (" for region " + r.getRegionNameAsString()) : ""), RemoteExceptionHandler.checkIOException(ex)); if (!server.checkFileSystem()) { break; } } catch (Exception ex) { LOG.error("Compaction failed" + - (r != null ? (" for region " + Bytes.toString(r.getRegionName())) : ""), + (r != null ? (" for region " + r.getRegionNameAsString()) : ""), ex); if (!server.checkFileSystem()) { break; @@ -155,7 +155,7 @@ class CompactSplitThread extends Thread implements HConstants { r.setForceMajorCompaction(force); if (LOG.isDebugEnabled()) { LOG.debug("Compaction " + (force? "(major) ": "") + - "requested for region " + Bytes.toString(r.getRegionName()) + + "requested for region " + r.getRegionNameAsString() + "/" + r.getRegionInfo().getEncodedName() + (why != null && !why.isEmpty()? " because: " + why: "")); } @@ -210,6 +210,7 @@ class CompactSplitThread extends Thread implements HConstants { // Add new regions to META for (int i = 0; i < newRegions.length; i++) { + LOG.debug("Split region names going into META: " + newRegions[i].getRegionNameAsString()); update = new BatchUpdate(newRegions[i].getRegionName()); update.put(COL_REGIONINFO, Writables.getBytes( newRegions[i].getRegionInfo())); diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java index d3fbb81..5f90615 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -503,6 +503,11 @@ public class HRegion implements HConstants { return this.regionInfo.getRegionName(); } + /** @return region name as string for logging */ + public String getRegionNameAsString() { + return this.regionInfo.getRegionNameAsString(); + } + /** @return HTableDescriptor for this region */ public HTableDescriptor getTableDesc() { return this.regionInfo.getTableDesc(); @@ -1192,6 +1197,7 @@ public class HRegion implements HConstants { Store store = getStore(columnFamily); KeyValue kv = new KeyValue(row, HConstants.LATEST_TIMESTAMP); // get the closest key. (HStore.getRowKeyAtOrBefore can return null) + LOG.debug("getClosestRowBefore looking for: " + Bytes.toStringBinary(row)); key = store.getRowKeyAtOrBefore(kv); if (key == null) { return null; @@ -1204,6 +1210,8 @@ public class HRegion implements HConstants { } store.getFull(kv, null, null, 1, null, results, System.currentTimeMillis()); // Convert to RowResult. TODO: Remove need to do this. + if (results.size() == 0) + LOG.debug("getClosestRowBefore couldn't find for: " + Bytes.toStringBinary(row)); return RowResult.createRowResult(results); } finally { splitsAndClosesLock.readLock().unlock(); @@ -2663,8 +2671,8 @@ public class HRegion implements HConstants { // Pick the latest value out of List c: if (c.size() >= 1) { // Use the memcache timestamp value. - LOG.debug("Overwriting the memcache value for " + Bytes.toString(row) + - "/" + Bytes.toString(column)); +// LOG.debug("Overwriting the memcache value for " + Bytes.toString(row) + +// "/" + Bytes.toString(column)); ts = c.get(0).getTimestamp(); value = c.get(0).getValue(); } @@ -2672,20 +2680,17 @@ public class HRegion implements HConstants { if (value == null) { // Check the store (including disk) for the previous value. c = store.get(kv, 1); - if (c != null && c.size() == 1) { - LOG.debug("Using HFile previous value for " + Bytes.toString(row) + - "/" + Bytes.toString(column)); + if (c != null && c.size() >= 1) { +// LOG.debug("Using HFile previous value for " + Bytes.toString(row) + +// "/" + Bytes.toString(column)); value = c.get(0).getValue(); - } else if (c != null && c.size() > 1) { - throw new DoNotRetryIOException("more than 1 value returned in " + - "incrementColumnValue from Store"); } } if (value == null) { // Doesn't exist - LOG.debug("Creating new counter value for " + Bytes.toString(row) + - "/"+ Bytes.toString(column)); + //LOG.debug("Creating new counter value for " + Bytes.toString(row) + + // "/"+ Bytes.toString(column)); value = Bytes.toBytes(amount); } else { if (amount == 0) return Bytes.toLong(value); diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index c77dd57..2aacf53 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -2458,7 +2458,11 @@ public class HRegionServer implements HConstants, HRegionInterface, requestCount.incrementAndGet(); try { HRegion region = getRegion(regionName); - return region.incrementColumnValue(row, column, amount); + long start = System.nanoTime(); + long val = region.incrementColumnValue(row, column, amount); + long time = (System.nanoTime() - start)/1000; // usec + this.metrics.atomicIncrementTime.inc(time); + return val; } catch (IOException e) { checkFileSystem(); throw e; diff --git a/src/java/org/apache/hadoop/hbase/regionserver/Store.java b/src/java/org/apache/hadoop/hbase/regionserver/Store.java index a916686..1e6d128 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/Store.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -526,7 +526,7 @@ public class Store implements HConstants { */ private HFile.Writer getWriter(final Path basedir) throws IOException { return StoreFile.getWriter(this.fs, basedir, this.blocksize, - this.compression, this.comparator.getRawComparator(), this.bloomfilter); + this.compression, this.comparator.getRawComparator()); } /* diff --git a/src/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/src/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index 7940c0c..be4d8a5 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -19,18 +19,11 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Map; -import java.util.Random; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.HBaseConfiguration; @@ -45,6 +38,14 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Hash; import org.apache.hadoop.io.RawComparator; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * A Store data file. Stores usually have one or more of these files. They * are produced by flushing the memcache to disk. To @@ -84,7 +85,7 @@ public class StoreFile implements HConstants { // If true, this file was product of a major compaction. Its then set // whenever you get a Reader. private AtomicBoolean majorCompaction = null; - + /* * Regex that will work for straight filenames and for reference names. * If reference, then the regex has more than just one group. Group 1 is @@ -269,9 +270,12 @@ public class StoreFile implements HConstants { this.majorCompaction.set(mc); } } + + // TODO read in bloom filter here, ignore if the column family config says + // "no bloom filter" even if there is one in the hfile. return this.reader; } - + /** * Override to add some customization on HFile.Reader */ @@ -384,7 +388,7 @@ public class StoreFile implements HConstants { */ public static HFile.Writer getWriter(final FileSystem fs, final Path dir) throws IOException { - return getWriter(fs, dir, DEFAULT_BLOCKSIZE_SMALL, null, null, false); + return getWriter(fs, dir, DEFAULT_BLOCKSIZE_SMALL, null, null); } /** @@ -397,13 +401,12 @@ public class StoreFile implements HConstants { * @param blocksize * @param algorithm Pass null to get default. * @param c Pass null to get default. - * @param filter BloomFilter * @return HFile.Writer * @throws IOException */ public static HFile.Writer getWriter(final FileSystem fs, final Path dir, - final int blocksize, final Compression.Algorithm algorithm, - final KeyValue.KeyComparator c, final boolean filter) + final int blocksize, final Compression.Algorithm algorithm, + final KeyValue.KeyComparator c) throws IOException { if (!fs.exists(dir)) { fs.mkdirs(dir); @@ -411,7 +414,7 @@ public class StoreFile implements HConstants { Path path = getUniqueFile(fs, dir); return new HFile.Writer(fs, path, blocksize, algorithm == null? HFile.DEFAULT_COMPRESSION_ALGORITHM: algorithm, - c == null? KeyValue.KEY_COMPARATOR: c, filter); + c == null? KeyValue.KEY_COMPARATOR: c); } /** diff --git a/src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index 7f44f73..46cbcfb 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -219,6 +219,7 @@ implements ChangedReadersObserver { (keys[i].getTimestamp() > viableTimestamp)))) { if (ttl == HConstants.FOREVER || now < keys[i].getTimestamp() + ttl) { viable = keys[i]; + viableTimestamp = keys[i].getTimestamp(); } else { if (LOG.isDebugEnabled()) { LOG.debug("getNextViableRow :" + keys[i] + ": expired, skipped"); diff --git a/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java b/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java index 2a723ef..064d2a0 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java @@ -52,6 +52,26 @@ public class RegionServerMetrics implements Updater { new MetricsTimeVaryingRate("atomicIncrementTime", registry); /** + * Block cache size. + */ + public final MetricsLongValue blockCacheSize = new MetricsLongValue("blockCacheSize", registry); + + /** + * Block cache free size. + */ + public final MetricsLongValue blockCacheFree = new MetricsLongValue("blockCacheFree", registry); + + /** + * Block cache item count. + */ + public final MetricsLongValue blockCacheCount = new MetricsLongValue("blockCacheCount", registry); + + /** + * Block hit ratio. + */ + public final MetricsIntValue blockCacheHitRatio = new MetricsIntValue("blockCacheHitRatio", registry); + + /** * Count of regions carried by this regionserver */ public final MetricsIntValue regions = @@ -131,6 +151,7 @@ public class RegionServerMetrics implements Updater { this.storefileIndexSizeMB.pushMetric(this.metricsRecord); this.memcacheSizeMB.pushMetric(this.metricsRecord); this.regions.pushMetric(this.metricsRecord); + this.atomicIncrementTime.pushMetric(this.metricsRecord); this.requests.pushMetric(this.metricsRecord); this.blockCacheSize.pushMetric(this.metricsRecord); diff --git a/src/java/org/apache/hadoop/hbase/util/BloomFilter.java b/src/java/org/apache/hadoop/hbase/util/BloomFilter.java new file mode 100644 index 0000000..2449c81 --- /dev/null +++ b/src/java/org/apache/hadoop/hbase/util/BloomFilter.java @@ -0,0 +1,20 @@ +package org.apache.hadoop.hbase.util; + +/** + * Created by IntelliJ IDEA. + * User: ryan + * Date: Apr 1, 2009 + * Time: 6:04:22 PM + * To change this template use File | Settings | File Templates. + */ +public interface BloomFilter { + boolean add(byte []buf, int offset, int len); + + boolean add(byte []buf); + + boolean contains(byte [] buf, int offset, int length); + + int max(); + + double err(); +} diff --git a/src/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java b/src/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java new file mode 100644 index 0000000..512a361 --- /dev/null +++ b/src/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java @@ -0,0 +1,475 @@ +/* + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Bloom Filter. + * + * Implemented using a byte[] as the bit vector. + * + * First version has an emphasis on memory efficiency NOT performance. + * + */ + +package org.apache.hadoop.hbase.util; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.DataOutput; +import java.io.DataInputStream; +import java.io.DataInput; +import java.io.IOException; +import java.lang.Math; +import java.nio.ByteBuffer; +import java.util.Random; + +import org.apache.commons.cli2.validation.InvalidArgumentException; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.hbase.io.HeapSize; +import org.onelab.filter.HashFunction; + +public class ByteBloomFilter implements BloomFilter, HeapSize, Writable { + /** Bits in the array */ + private int m; + /** Bytes in the array */ + private int b; + /** Number of elements */ + private int n; + /** Number of hash functions */ + private int k; + /** Error rate */ + private double err; + /** Maximum elements */ + private int max; + /** Bloom bits */ + private byte [] bits; + /** Heap size */ + private long heapSize; + + private int hashType; // wah. + private HashFunction hash; + + /** Bit-value lookup array to prevent doing the same work over and over */ + private static final byte [] bitvals = { + (byte) 0x01, + (byte) 0x02, + (byte) 0x04, + (byte) 0x08, + (byte) 0x10, + (byte) 0x20, + (byte) 0x40, + (byte) 0x80 + }; + + /** + * Default constructor. Instantiate a Bloom filter that will support + * up to specified number of elements with the specified false-positive rate. + * + * @param hashType from Hash - one of JENKINS_HASH or MURMUR_HASH. + * @param max maximum number of supported elements + * @param err accepted false-positive rate + */ + public ByteBloomFilter(int hashType, int max, double err) { + this.max = max; + this.err = err; + + m = (int)Math.ceil(max * (Math.log(err) / Math.log(0.6185))); + k = (int)Math.ceil(Math.log(2) * (m / max)); + b = (int)Math.ceil((float)m / (float)8); + n = 0; + bits = new byte[b]; + heapSize = (6 * HeapSize.INT) + HeapSize.DOUBLE + HeapSize.BYTE_ARRAY + + bits.length; + System.out.println("Bloom filter initialized, " + m + " bits, " + b + + " bytes, " + k + " hashes, " + err+ " error rate, up to " + max + + " elements"); + + initHash(hashType); + } + + private void initHash(int hashType) { + this.hashType = hashType; + hash = new HashFunction(m, k, hashType); + } + + /** + * Full constructor. Instantiates a fully-specified Bloom filter. + * + * @param hashType hash function used + * @param m bits in the array + * @param b length of the backing byte array + * @param n number of elements in the bloom + * @param k number of hash functions + * @param err false positive probability + * @param max maximum number of elements bloom can contain + * @param bits byte array of size b + */ + public ByteBloomFilter(int hashType, int m, int b, int n, int k, double err, + int max, byte [] bits) { + this.m = m; + this.b = b; + this.n = n; + this.k = k; + this.err = err; + this.max = max; + this.bits = bits; + int bitlen = 0; + if(bits != null) bitlen = bits.length; + heapSize = (6 * HeapSize.INT) + HeapSize.DOUBLE + HeapSize.ARRAY + bitlen; + + initHash(hashType); + } + + //--------------------------------------------------------------------------- + /** + * Add the specified long to the bloom filter. + * + * @param id long to be added to the bloom + * @return true + */ + public boolean add(long id) { + return add(Bytes.toBytes(id)); + } + + /** + * Add the specified binary to the bloom filter. + * + * @param buf data to be added to the bloom + * @param offset offset into the data to be added + * @param len length of the data to be added + * @return true on success, false on failure + */ + public boolean add(byte [] buf, int offset, int len) { + if(offset + len > buf.length) return false; + + int [] h = hash.hash(buf, offset, len); + for (int i : h ) { + set(i); + } + return true; + } + + /** + * Adds a ByteBuffer - from position=0 -> limit + * + * @param buf bytes to add + * @return true on success, false on failure + */ + public boolean add(ByteBuffer buf) { + return add(buf.array(), buf.arrayOffset(), buf.limit()); + } + + /** + * Add the specified binary to the bloom filter. + * + * @param buf data to be added to the bloom + * @return true on success, false on failure + */ + public boolean add(byte [] buf) { + return add(buf, 0, buf.length); + } + + /** + * Check if the specified long is contained in the bloom filter. + * + * @param id long to check for existence of + * @return true if found in bloom, false if not + */ + public boolean contains(long id) { + return contains(Bytes.toBytes(id)); + } + + /** + * Check if the specified binary is contained in the bloom filter. + * + * @param buf data to check for existence of + * @return true if found in bloom, false if not + */ + public boolean contains(byte [] buf) { + return contains(buf, 0, buf.length); + } + + /** + * Check if the specified binary is contained in the bloom filter. + * + * @param buf data to check for existence of + * @param offset offset into the data + * @param length length of the data + * @return true if found in bloom, false if not + */ + public boolean contains(byte [] buf, int offset, int length) { + if(offset + length > buf.length) return false; + int [] h = hash.hash(buf, offset, length); + for (int i : h) { + if ( !get(i) ) + return false; + } + return true; + } + + //--------------------------------------------------------------------------- + /** Simple accessors/setters */ + + /** + * Return the number of elements in the bloom. + * + * @return number of elements + */ + public int size() { + return n; + } + + /** + * Set the number of elements in the bloom. + * + * @param size number of elements + */ + public void setSize(int size) { + n = size; + } + + /** + * Return the maximum number of elements allowed in the bloom. + * + * @return max elements allowed + */ + public int max(){ + return max; + } + + /** + * Return the false-positive error rate of the bloom. + * + * This is the false-positive probability when the number of elements + * in the bloom equals the maximum number of elements allowed. + * + * @return error rate + */ + public double err(){ + return err; + } + + /** + * Returns the byte array that backs the bloom bit vector. + * + * @return the backing byte array + */ + public byte [] getBits(){ + return bits; + } + + /** + * Sets the byte array that back the bloom bit-vector. + * + * @param bits the bit-vector byte array + */ + public void setBits(byte [] bits) { + this.bits = bits; + heapSize = (6 * HeapSize.INT) + HeapSize.DOUBLE + HeapSize.ARRAY + + bits.length; + } + + /** + * Returns the number of bits in the bloom bit-vector. + * + * @return number of bits + */ + public int numBits() { + return m; + } + + /** + * Returns the number of bytes in the backing byte array. + * + * @return number of bytes + */ + public int numBytes() { + return b; + } + + /** + * Returns the number of hash functions used for the bloom filter. + * + * @return number of hashes + */ + public int numHashes() { + return k; + } + + /** + * Returns a String representation of the bit-vector. + * + * @return string representation of bits + */ + public String toString(){ + String ret = ""; + for(int i=0;i= '0' && ch <= '9') + || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z') + || ch == ',' + || ch == '_' + || ch == '-' + || ch == ':' + || ch == '.') { + result += first.charAt(i); + } else { + result += String.format("\\x%02X", ch); + } + } + } catch ( UnsupportedEncodingException e) { + e.printStackTrace(); + } + return result; + } + + private static boolean isHexDigit(char c) { + return + (c >= 'A' && c <= 'F') || + (c >= '0' && c <= '9'); + } + + /** + * Takes a ASCII digit in the range A-F0-9 and returns + * the corresponding integer/ordinal value. + * @param ch + * @return + */ + public static byte toBinaryFromHex(byte ch) { + if ( ch >= 'A' && ch <= 'F' ) + return (byte) ((byte)10 + (byte) (ch - 'A')); + // else + return (byte) (ch - '0'); + } + + public static byte [] toBytesBinary(String in) { + // this may be bigger than we need, but lets be safe. + byte [] b = new byte[in.length()]; + int size = 0; + for (int i = 0; i < in.length(); ++i) { + char ch = in.charAt(i); + if (ch == '\\') { + // begin hex escape: + char next = in.charAt(i+1); + if (next != 'x') { + // invalid escape sequence, ignore this one. + b[size++] = (byte)ch; + continue; + } + // ok, take next 2 hex digits. + char hd1 = in.charAt(i+2); + char hd2 = in.charAt(i+3); + + // they need to be A-F0-9: + if ( ! isHexDigit(hd1) || + ! isHexDigit(hd2) ) { + // bogus escape code, ignore: + continue; + } + // turn hex ASCII digit -> number + byte d = (byte) ((toBinaryFromHex((byte)hd1) << 4) + toBinaryFromHex((byte)hd2)); + + b[size++] = d; + i += 3; // skip 3 + } else { + b[size++] = (byte) ch; + } + } + // resize: + byte [] b2 = new byte[size]; + System.arraycopy(b, 0, b2, 0, size); + return b2; + } + /** * Converts a string to a UTF-8 byte array. * @param s diff --git a/src/java/org/apache/hadoop/hbase/util/DynamicByteBloomFilter.java b/src/java/org/apache/hadoop/hbase/util/DynamicByteBloomFilter.java new file mode 100644 index 0000000..7f9a49e --- /dev/null +++ b/src/java/org/apache/hadoop/hbase/util/DynamicByteBloomFilter.java @@ -0,0 +1,120 @@ +/* + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.io.Writable; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.DataInput; + +public class DynamicByteBloomFilter implements HeapSize, Writable, BloomFilter { + + // Max count of each sub-bloom filter. + private int max; + // Current count of the current sub-bloom filter. + private int currentN; + + // Hash type for new blooms. + private int hashType; + // The max acceptible error rate. + private double err; + + private ByteBloomFilter [] blooms; + + public DynamicByteBloomFilter() { } + + /** + * Build a dynamic bloom filter. + * @param hashType The hash type to use, {@see Hash} + * @param max + * @param err + */ + public DynamicByteBloomFilter(int hashType, int max, double err) { + this.hashType = hashType; + this.max = max; + this.err = err; + + blooms = new ByteBloomFilter[0]; + blooms[0] = new ByteBloomFilter(this.hashType, this.max, this.err); + } + + public boolean add(byte []buf, int offset, int len) { + ByteBloomFilter bf = getCurrentFilter(); + + if (bf == null) { + addBloom(); + bf = blooms[blooms.length - 1]; + this.currentN = 0; + } + + return true; + } + + private void addBloom() { + ByteBloomFilter [] n = new ByteBloomFilter[blooms.length+1]; + System.arraycopy(blooms, 0, n, 0, blooms.length); + blooms = n; + blooms[blooms.length] = new ByteBloomFilter(this.hashType, this.max, this.err); + } + + private ByteBloomFilter getCurrentFilter() { + if (this.currentN > this.max) + return null; + + // stack, right? + return blooms[blooms.length - 1]; + } + + public boolean add(byte []buf) { + return add(buf, 0, buf.length); + } + + public boolean contains(byte [] buf, int offset, int length) { + return false; + } + + int size() { + return this.currentN; + } + + public int max() { + return this.max; + } + + public double err() { + return this.err; + } + + + public long heapSize() { + return 0; + } + + public void write(DataOutput out) throws IOException { + + } + + public void readFields(DataInput in) throws IOException { + + } +} diff --git a/src/java/org/apache/hadoop/hbase/util/Hash.java b/src/java/org/apache/hadoop/hbase/util/Hash.java index d5a5e8a..704a275 100644 --- a/src/java/org/apache/hadoop/hbase/util/Hash.java +++ b/src/java/org/apache/hadoop/hbase/util/Hash.java @@ -93,7 +93,7 @@ public abstract class Hash { * @return hash value */ public int hash(byte[] bytes) { - return hash(bytes, bytes.length, -1); + return hash(bytes, 0, bytes.length, -1); } /** @@ -104,7 +104,7 @@ public abstract class Hash { * @return hash value */ public int hash(byte[] bytes, int initval) { - return hash(bytes, bytes.length, initval); + return hash(bytes, 0, bytes.length, initval); } /** @@ -115,5 +115,5 @@ public abstract class Hash { * @param initval seed value * @return hash value */ - public abstract int hash(byte[] bytes, int length, int initval); + public abstract int hash(byte[] bytes, int pos, int length, int initval); } diff --git a/src/java/org/apache/hadoop/hbase/util/JenkinsHash.java b/src/java/org/apache/hadoop/hbase/util/JenkinsHash.java index a34a9e8..58546b3 100644 --- a/src/java/org/apache/hadoop/hbase/util/JenkinsHash.java +++ b/src/java/org/apache/hadoop/hbase/util/JenkinsHash.java @@ -80,11 +80,11 @@ public class JenkinsHash extends Hash { */ @Override @SuppressWarnings("fallthrough") - public int hash(byte[] key, int nbytes, int initval) { + public int hash(byte[] key, int offset, int nbytes, int initval) { int length = nbytes; long a, b, c; // We use longs because we don't have unsigned ints a = b = c = (0x00000000deadbeefL + length + initval) & INT_MASK; - int offset = 0; + //int offset = 0; for (; length > 12; offset += 12, length -= 12) { a = (a + (key[offset + 0] & BYTE_MASK)) & INT_MASK; a = (a + (((key[offset + 1] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; @@ -213,7 +213,7 @@ public class JenkinsHash extends Hash { * 12 14 25 16 4 14 24 * and these came close: * 4 8 15 26 3 22 24 - * 10 8 15 26 3 22 24 +0 * 10 8 15 26 3 22 24 * 11 8 15 26 3 22 24 * * #define final(a,b,c) \ @@ -254,7 +254,7 @@ public class JenkinsHash extends Hash { int value = 0; JenkinsHash hash = new JenkinsHash(); for (int length = in.read(bytes); length > 0 ; length = in.read(bytes)) { - value = hash.hash(bytes, length, value); + value = hash.hash(bytes, 0, length, value); } System.out.println(Math.abs(value)); } diff --git a/src/java/org/apache/hadoop/hbase/util/MurmurHash.java b/src/java/org/apache/hadoop/hbase/util/MurmurHash.java index 72504af..01e43fb 100644 --- a/src/java/org/apache/hadoop/hbase/util/MurmurHash.java +++ b/src/java/org/apache/hadoop/hbase/util/MurmurHash.java @@ -33,7 +33,7 @@ public class MurmurHash extends Hash { } @Override - public int hash(byte[] data, int length, int seed) { + public int hash(byte[] data, int pos, int length, int seed) { int m = 0x5bd1e995; int r = 24; @@ -63,13 +63,13 @@ public class MurmurHash extends Hash { if (left != 0) { if (left >= 3) { - h ^= data[length - 3] << 16; + h ^= data[pos + length - 3] << 16; } if (left >= 2) { - h ^= data[length - 2] << 8; + h ^= data[pos + length - 2] << 8; } if (left >= 1) { - h ^= data[length - 1]; + h ^= data[pos + length - 1]; } h *= m; diff --git a/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java b/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java index 54afb1f..ef929f2 100644 --- a/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java +++ b/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java @@ -33,6 +33,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; +import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.util.Bytes; /** @@ -187,7 +188,7 @@ public class HFilePerformanceEvaluation { @Override void setUp() throws Exception { - writer = new HFile.Writer(this.fs, this.mf, RFILE_BLOCKSIZE, null, null); + writer = new HFile.Writer(this.fs, this.mf, RFILE_BLOCKSIZE, (Compression.Algorithm) null, null); } @Override diff --git a/src/test/org/apache/hadoop/hbase/TestTable.java b/src/test/org/apache/hadoop/hbase/TestTable.java index 196c069..4f1159e 100644 --- a/src/test/org/apache/hadoop/hbase/TestTable.java +++ b/src/test/org/apache/hadoop/hbase/TestTable.java @@ -109,8 +109,8 @@ public class TestTable extends HBaseClusterTestCase { } // All threads are now dead. Count up how many tables were created and // how many failed w/ appropriate exception. - assertTrue(successes.get() == 1); - assertTrue(failures.get() == (count - 1)); + assertEquals(1, successes.get()); + assertEquals(count - 1, failures.get()); } /** diff --git a/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java index ed589f8..a1ad2f9 100644 --- a/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java +++ b/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java @@ -114,7 +114,7 @@ public class TestHFile extends HBaseTestCase { Path ncTFile = new Path(ROOT_DIR, "basic.hfile"); FSDataOutputStream fout = createFSOutput(ncTFile); Writer writer = new Writer(fout, minBlockSize, - Compression.getCompressionAlgorithmByName(codec), null, false); + Compression.getCompressionAlgorithmByName(codec), null); LOG.info(writer); writeRecords(writer); fout.close(); @@ -178,7 +178,7 @@ public class TestHFile extends HBaseTestCase { Path mFile = new Path(ROOT_DIR, "meta.hfile"); FSDataOutputStream fout = createFSOutput(mFile); Writer writer = new Writer(fout, minBlockSize, - Compression.getCompressionAlgorithmByName(compress), null, false); + Compression.getCompressionAlgorithmByName(compress), null); someTestingWithMetaBlock(writer); writer.close(); fout.close(); @@ -204,7 +204,7 @@ public class TestHFile extends HBaseTestCase { Path mFile = new Path(ROOT_DIR, "nometa.hfile"); FSDataOutputStream fout = createFSOutput(mFile); Writer writer = new Writer(fout, minBlockSize, - Compression.Algorithm.NONE, null, false); + Compression.Algorithm.NONE, null); writer.append("foo".getBytes(), "value".getBytes()); writer.close(); fout.close(); @@ -226,7 +226,7 @@ public class TestHFile extends HBaseTestCase { public void testComparator() throws IOException { Path mFile = new Path(ROOT_DIR, "meta.tfile"); FSDataOutputStream fout = createFSOutput(mFile); - Writer writer = new Writer(fout, minBlockSize, null, + Writer writer = new Writer(fout, minBlockSize, (Compression.Algorithm) null, new RawComparator() { @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, @@ -238,7 +238,7 @@ public class TestHFile extends HBaseTestCase { public int compare(byte[] o1, byte[] o2) { return compare(o1, 0, o1.length, o2, 0, o2.length); } - }, false); + }); writer.append("3".getBytes(), "0".getBytes()); writer.append("2".getBytes(), "0".getBytes()); writer.append("1".getBytes(), "0".getBytes()); diff --git a/src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java b/src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java index 9f6601b..fd8d6b9 100644 --- a/src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java +++ b/src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java @@ -19,9 +19,6 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.nio.ByteBuffer; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -34,6 +31,10 @@ import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hdfs.MiniDFSCluster; + +import java.io.IOException; +import java.nio.ByteBuffer; + /** * Test HStoreFile */ @@ -71,11 +72,11 @@ public class TestStoreFile extends HBaseTestCase { // Make up a directory hierarchy that has a regiondir and familyname. HFile.Writer writer = StoreFile.getWriter(this.fs, new Path(new Path(this.testDir, "regionname"), "familyname"), - 2 * 1024, null, null, false); + 2 * 1024, null, null); writeStoreFile(writer); checkHalfHFile(new StoreFile(this.fs, writer.getPath(), conf)); } - + /* * Writes HStoreKey and ImmutableBytes data to passed writer and * then closes it. @@ -110,7 +111,7 @@ public class TestStoreFile extends HBaseTestCase { Path dir = new Path(storedir, "1234567890"); // Make a store file and write data to it. HFile.Writer writer = StoreFile.getWriter(this.fs, dir, 8 * 1024, null, - null, false); + null); writeStoreFile(writer); StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf); HFile.Reader reader = hsf.getReader(); diff --git a/src/test/org/apache/hadoop/hbase/util/TestByteBloomFilter.java b/src/test/org/apache/hadoop/hbase/util/TestByteBloomFilter.java new file mode 100644 index 0000000..49c85e7 --- /dev/null +++ b/src/test/org/apache/hadoop/hbase/util/TestByteBloomFilter.java @@ -0,0 +1,79 @@ +/* + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import junit.framework.TestCase; + +public class TestByteBloomFilter extends TestCase { + + public void testBloom() throws Exception { + ByteBloomFilter bf1 = new ByteBloomFilter(Hash.MURMUR_HASH, 1000,0.01); + ByteBloomFilter bf2 = new ByteBloomFilter(Hash.MURMUR_HASH, 1000,0.01); + + long key1 = 149373613430716241L; + long key2 = 149373613430716242L; + + bf1.add(key1); + bf2.add(key2); + + assertTrue(bf1.contains(key1)); + assertFalse(bf1.contains(key2)); + assertFalse(bf2.contains(key1)); + assertTrue(bf2.contains(key2)); + + ByteBloomFilter bfnew = ByteBloomFilter.combineBlooms(bf1,bf2,1); + + assertTrue( bfnew.contains(key1) ); + assertTrue( bfnew.contains(key2) ); + + byte [] bkey = {1,2,3,4}; + byte [] bval = "this is a much larger byte array".getBytes(); + + bfnew.add(bkey); + bfnew.add(bval, 1, 20); + + assertTrue( bfnew.contains(bkey) ); + assertTrue( bfnew.contains(bval, 1, 20) ); + assertFalse( bfnew.contains(bval) ); + assertFalse( bfnew.contains(bval, 0, 10) ); + + System.out.println("Bloom bit vector contains " + + bfnew.numBits() + " bits"); + + byte [] ser = ByteBloomFilter.toBytes(bfnew); + + System.out.println("Serialized as " + ser.length + " bytes"); + + System.out.println("HeapSize is " + bfnew.heapSize()); + + // test serialization/deserialized. + ByteBloomFilter deserBf = ByteBloomFilter.toBloom(ser); + + assertTrue( deserBf.contains(bkey) ); + assertTrue( deserBf.contains(bval, 1, 20) ); + assertFalse( deserBf.contains(bval) ); + assertFalse( deserBf.contains(bval, 0, 10) ); + + assertTrue( deserBf.contains(key1) ); + assertTrue( deserBf.contains(key2) ); + } + +} diff --git a/src/test/org/apache/hadoop/hbase/util/TestBytes.java b/src/test/org/apache/hadoop/hbase/util/TestBytes.java index e919d5f..c092791 100644 --- a/src/test/org/apache/hadoop/hbase/util/TestBytes.java +++ b/src/test/org/apache/hadoop/hbase/util/TestBytes.java @@ -114,6 +114,15 @@ public class TestBytes extends TestCase { assertEquals(5, Bytes.binarySearch(arr, key3, 1, 1, Bytes.BYTES_RAWCOMPARATOR)); } + + public void testToStringBinary() { + byte [] b = {5,0,0,(byte)0xFF, 0x3C, 'A', ',', '-', '_', '.', ':'}; + String r = Bytes.toStringBinary(b); + assertEquals("\\x05\\x00\\x00\\xFF\\x3CA,-_.:", r); + + // make sure we can go backwards too. + assertTrue(Bytes.compareTo(Bytes.toBytesBinary(r), b) == 0); + } public void testIncrementBytes() throws IOException { diff --git a/src/test/org/apache/hadoop/hbase/util/TestMurmurHash.java b/src/test/org/apache/hadoop/hbase/util/TestMurmurHash.java new file mode 100644 index 0000000..6ac265c --- /dev/null +++ b/src/test/org/apache/hadoop/hbase/util/TestMurmurHash.java @@ -0,0 +1,94 @@ +/* + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import junit.framework.TestCase; + +public class TestMurmurHash extends TestCase { + + + public void testHash() { + /** + * So this code is very "magic" value test... changing the + * hash function in any way will break tons of things, so that is ok. + * + * I originally got these values, then refactored the hash function + * to take an offset, and I wanted a good test that make sure + * the before/after matched. It does. + */ + MurmurHash hash = new MurmurHash(); + + int seed = 0; + byte [] data1 = Bytes.toBytes("abc"); + byte [] data2 = Bytes.toBytes("def"); + byte [] data3 = Bytes.toBytes("zxy"); + byte [] data4 = Bytes.toBytes("laskdjfadlkjaasdfasdf"); + + assertEquals(1775488903, hash.hash(data1, 0, data1.length, seed)); + assertEquals(-1874964424, hash.hash(data2, 0, data2.length, seed)); + assertEquals(-32398457, hash.hash(data3, 0, data3.length, seed)); + assertEquals(-969272041, hash.hash(data4, 0, data4.length, seed)); + + seed = 2; + + assertEquals(423738406, hash.hash(data1, 0, data1.length, seed)); + assertEquals(1220541287, hash.hash(data2, 0, data2.length, seed)); + assertEquals(-396470171, hash.hash(data3, 0, data3.length, seed)); + assertEquals(-320704939, hash.hash(data4, 0, data4.length, seed)); + + seed = 42; + + assertEquals(2068500674, hash.hash(data1, 0, data1.length, seed)); + assertEquals(-114446167, hash.hash(data2, 0, data2.length, seed)); + assertEquals(35177023, hash.hash(data3, 0, data3.length, seed)); + assertEquals(-339881049, hash.hash(data4, 0, data4.length, seed)); + + + // test some offsets now that are similiar to previous datas. + seed = 0; + data1 = Bytes.toBytes("1abc"); // 1 + data2 = Bytes.toBytes("12def"); // 2 + data3 = Bytes.toBytes("123zxy"); // 3 + data4 = Bytes.toBytes("0123laskdjfadlkjaasdfasdf"); // 4 + + assertEquals(1775488903, hash.hash(data1, 1, data1.length-1, seed)); + assertEquals(-1874964424, hash.hash(data2, 2, data2.length-2, seed)); + assertEquals(-32398457, hash.hash(data3, 3, data3.length-3, seed)); + assertEquals(-969272041, hash.hash(data4, 4, data4.length-4, seed)); + + seed = 2; + + assertEquals(423738406, hash.hash(data1, 1, data1.length-1, seed)); + assertEquals(1220541287, hash.hash(data2, 2, data2.length-2, seed)); + assertEquals(-396470171, hash.hash(data3, 3, data3.length-3, seed)); + assertEquals(-320704939, hash.hash(data4, 4, data4.length-4, seed)); + + seed = 42; + + assertEquals(2068500674, hash.hash(data1, 1, data1.length-1, seed)); + assertEquals(-114446167, hash.hash(data2, 2, data2.length-2, seed)); + assertEquals(35177023, hash.hash(data3, 3, data3.length-3, seed)); + assertEquals(-339881049, hash.hash(data4, 4, data4.length-4, seed)); + + } + + +} diff --git a/src/webapps/master/master.jsp b/src/webapps/master/master.jsp index c5dcb6f..7caea65 100644 --- a/src/webapps/master/master.jsp +++ b/src/webapps/master/master.jsp @@ -160,7 +160,7 @@ $(document).ready(function(){ Arrays.sort(serverNames); for (String serverName: serverNames) { HServerInfo hsi = serverToServerInfos.get(serverName); - String hostname = hsi.getName() + ":" + hsi.getInfoPort(); + String hostname = hsi.getServerAddress().getInetSocketAddress().getAddress().getHostAddress() + ":" + hsi.getInfoPort(); String url = "http://" + hostname + "/"; totalRegions += hsi.getLoad().getNumberOfRegions(); totalRequests += hsi.getLoad().getNumberOfRequests() / interval; diff --git a/src/webapps/master/regionhistorian.jsp b/src/webapps/master/regionhistorian.jsp index efbc99f..ff150e3 100644 --- a/src/webapps/master/regionhistorian.jsp +++ b/src/webapps/master/regionhistorian.jsp @@ -5,10 +5,12 @@ import="org.apache.hadoop.hbase.RegionHistorian" import="org.apache.hadoop.hbase.master.HMaster" import="org.apache.hadoop.hbase.RegionHistorian.RegionHistoryInformation" - import="org.apache.hadoop.hbase.HConstants"%><% - String regionName = request.getParameter("regionname"); + import="org.apache.hadoop.hbase.HConstants"%> +<%@ page import="org.apache.hadoop.hbase.util.Bytes" %> +<% + String regionName = request.getParameter("regionname"); HMaster master = (HMaster)getServletContext().getAttribute(HMaster.MASTER); - List informations = RegionHistorian.getInstance().getRegionHistory(regionName); + List informations = RegionHistorian.getInstance().getRegionHistory(Bytes.toBytesBinary(regionName)); // Pattern used so we can wrap a regionname in an href. Pattern pattern = Pattern.compile(RegionHistorian.SPLIT_PREFIX + "(.*)$"); %> diff --git a/src/webapps/master/table.jsp b/src/webapps/master/table.jsp index 45cb3b8..e3cbc21 100644 --- a/src/webapps/master/table.jsp +++ b/src/webapps/master/table.jsp @@ -15,6 +15,8 @@ import="org.apache.hadoop.hbase.master.HMaster" import="org.apache.hadoop.hbase.master.MetaRegion" import="org.apache.hadoop.hbase.util.Bytes" + import="java.io.IOException" + import="java.util.Map" import="org.apache.hadoop.hbase.HConstants"%><% HMaster master = (HMaster)getServletContext().getAttribute(HMaster.MASTER); HBaseConfiguration conf = master.getConfiguration(); @@ -38,7 +40,6 @@ if ( action != null ) { %> - @@ -132,17 +133,17 @@ hriEntry.getValue()).getInfoPort(); String urlRegionHistorian = - "/regionhistorian.jsp?regionname=" + - URLEncoder.encode(hriEntry.getKey().getRegionNameAsString(), "UTF-8"); + "/regionhistorian.jsp?regionname="+ + Bytes.toStringBinary(hriEntry.getKey().getRegionName()); String urlRegionServer = "http://" + hriEntry.getValue().getHostname().toString() + ":" + infoPort + "/"; %> - <%= hriEntry.getKey().getRegionNameAsString()%> + <%= Bytes.toStringBinary(hriEntry.getKey().getRegionName())%> <%= hriEntry.getValue().toString() %> - <%= hriEntry.getKey().getEncodedName()%> <%= Bytes.toString(hriEntry.getKey().getStartKey())%> - <%= Bytes.toString(hriEntry.getKey().getEndKey())%> + <%= hriEntry.getKey().getEncodedName()%> <%= Bytes.toStringBinary(hriEntry.getKey().getStartKey())%> + <%= Bytes.toStringBinary(hriEntry.getKey().getEndKey())%> <% } %> diff --git a/src/webapps/regionserver/regionserver.jsp b/src/webapps/regionserver/regionserver.jsp index be1e60f..1da1682 100644 --- a/src/webapps/regionserver/regionserver.jsp +++ b/src/webapps/regionserver/regionserver.jsp @@ -47,7 +47,7 @@ HServerLoad.RegionLoad load = regionServer.createRegionLoad(r.getRegionName()); %> <%= r.getRegionNameAsString() %><%= r.getEncodedName() %> - <%= Bytes.toString(r.getStartKey()) %><%= Bytes.toString(r.getEndKey()) %> + <%= Bytes.toStringBinary(r.getStartKey()) %><%= Bytes.toStringBinary(r.getEndKey()) %> <%= load.toString() %> <% } %>