>,
}
/**
+ * Create a new Cell with a given value and timestamp. Used by HStore.
+ *
+ * @param bb
+ * @param timestamp
+ */
+ public Cell(final ByteBuffer bb, long timestamp) {
+ this.valueMap.put(timestamp, Bytes.toBytes(bb));
+ }
+
+ /**
* @param vals
* array of values
* @param ts
diff --git a/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java b/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java
index c627ec2..2177dbe 100644
--- a/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java
+++ b/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java
@@ -72,10 +72,10 @@ public class HBaseMapFile extends MapFile {
public HBaseReader(FileSystem fs, String dirName, Configuration conf,
boolean blockCacheEnabled, HRegionInfo hri)
throws IOException {
- super(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri),
+ super(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(),
conf, false); // defer opening streams
this.blockCacheEnabled = blockCacheEnabled;
- open(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri), conf);
+ open(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(), conf);
// Force reading of the mapfile index by calling midKey. Reading the
// index will bring the index into memory over here on the client and
@@ -121,7 +121,7 @@ public class HBaseMapFile extends MapFile {
public HBaseWriter(Configuration conf, FileSystem fs, String dirName,
SequenceFile.CompressionType compression, final HRegionInfo hri)
throws IOException {
- super(conf, fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri),
+ super(conf, fs, dirName, new HStoreKey.HStoreKeyWritableComparator(),
VALUE_CLASS, compression);
// Default for mapfiles is 128. Makes random reads faster if we
// have more keys indexed and we're not 'next'-ing around in the
diff --git a/src/java/org/apache/hadoop/hbase/io/HalfHFileReader.java b/src/java/org/apache/hadoop/hbase/io/HalfHFileReader.java
new file mode 100644
index 0000000..7794cfa
--- /dev/null
+++ b/src/java/org/apache/hadoop/hbase/io/HalfHFileReader.java
@@ -0,0 +1,198 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.hfile.BlockCache;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * A facade for a {@link org.apache.hadoop.io.hfile.HFile.Reader} that serves up
+ * either the top or bottom half of a HFile where 'bottom' is the first half
+ * of the file containing the keys that sort lowest and 'top' is the second half
+ * of the file with keys that sort greater than those of the bottom half.
+ * The top includes the split files midkey, of the key that follows if it does
+ * not exist in the file.
+ *
+ * This type works in tandem with the {@link Reference} type. This class
+ * is used reading while Reference is used writing.
+ *
+ *
This file is not splitable. Calls to {@link #midKey()} return null.
+ */
+public class HalfHFileReader extends HFile.Reader {
+ static final Log LOG = LogFactory.getLog(HalfHFileReader.class);
+ private final boolean top;
+ // This is the key we split around. Its the first possible entry on a row:
+ // i.e. empty column and a timestamp of LATEST_TIMESTAMP.
+ private final byte [] splitkey;
+
+ /**
+ * @param fs
+ * @param f
+ * @param c
+ * @param r
+ * @throws IOException
+ */
+ public HalfHFileReader(final FileSystem fs, final Path p, final BlockCache c,
+ final Reference r)
+ throws IOException {
+ super(fs, p, c);
+ // This is not actual midkey for this half-file; its just border
+ // around which we split top and bottom. Have to look in files to find
+ // actual last and first keys for bottom and top halves. Half-files don't
+ // have an actual midkey themselves. No midkey is how we indicate file is
+ // not splittable.
+ this.splitkey = r.getSplitKey();
+ // Is it top or bottom half?
+ this.top = Reference.isTopFileRegion(r.getFileRegion());
+ }
+
+ public HFileScanner getScanner() {
+ final HFileScanner s = super.getScanner();
+ return new HFileScanner() {
+ final HFileScanner delegate = s;
+
+ public ByteBuffer getKey() {
+ return delegate.getKey();
+ }
+
+ public String getKeyString() {
+ return delegate.getKeyString();
+ }
+
+ public ByteBuffer getValue() {
+ return delegate.getValue();
+ }
+
+ public String getValueString() {
+ return delegate.getValueString();
+ }
+
+ public boolean next() throws IOException {
+ boolean b = delegate.next();
+ if (!b) {
+ return b;
+ }
+ if (!top) {
+ ByteBuffer bb = getKey();
+ if (getComparator().compare(bb.array(), bb.arrayOffset(), bb.limit(),
+ splitkey, 0, splitkey.length) >= 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public boolean seekBefore(byte[] key) throws IOException {
+ if (top) {
+ if (getComparator().compare(key, splitkey) < 0) {
+ return false;
+ }
+ } else {
+ if (getComparator().compare(key, splitkey) >= 0) {
+ return seekBefore(splitkey);
+ }
+ }
+ return this.delegate.seekBefore(key);
+ }
+
+ public boolean seekTo() throws IOException {
+ if (top) {
+ int r = this.delegate.seekTo(splitkey);
+ if (r < 0) {
+ // midkey is < first key in file
+ return this.delegate.seekTo();
+ }
+ if (r > 0) {
+ return this.delegate.next();
+ }
+ return true;
+ }
+
+ boolean b = delegate.seekTo();
+ if (!b) {
+ return b;
+ }
+ // Check key.
+ ByteBuffer k = this.delegate.getKey();
+ return this.delegate.getReader().getComparator().
+ compare(k.array(), k.arrayOffset(), k.limit(),
+ splitkey, 0, splitkey.length) < 0;
+ }
+
+ public int seekTo(byte[] key) throws IOException {
+ if (top) {
+ if (getComparator().compare(key, splitkey) < 0) {
+ return -1;
+ }
+ } else {
+ if (getComparator().compare(key, splitkey) >= 0) {
+ // we would place the scanner in the second half.
+ // it might be an error to return false here ever...
+ boolean res = delegate.seekBefore(splitkey);
+ if (!res) {
+ throw new IOException("Seeking for a key in bottom of file, but key exists in top of file, failed on seekBefore(midkey)");
+ }
+ return 1;
+ }
+ }
+ return delegate.seekTo(key);
+ }
+
+ public Reader getReader() {
+ return this.delegate.getReader();
+ }
+
+ public boolean isSeeked() {
+ return this.delegate.isSeeked();
+ }
+ };
+ }
+
+ public byte[] getLastKey() {
+ if (top) {
+ return super.getLastKey();
+ } else {
+ HFileScanner scanner = getScanner();
+ try {
+ if (scanner.seekBefore(this.splitkey)) {
+ return Bytes.toBytes(scanner.getKey());
+ }
+ } catch (IOException e) {
+ LOG.warn("Failed seekBefore " + Bytes.toString(this.splitkey), e);
+ }
+ return null;
+ }
+ }
+
+ public byte[] midkey() throws IOException {
+ // Returns null to indicate file is not splitable.
+ return null;
+ }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java b/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java
index e1d36c9..d94536b 100644
--- a/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java
+++ b/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java
@@ -90,7 +90,6 @@ public class HalfMapFileReader extends BloomFilterMapFile.Reader {
// have an actual midkey themselves. No midkey is how we indicate file is
// not splittable.
this.midkey = new HStoreKey((HStoreKey)mk);
- this.midkey.setHRegionInfo(hri);
// Is it top or bottom half?
this.top = Reference.isTopFileRegion(r);
}
@@ -212,4 +211,4 @@ public class HalfMapFileReader extends BloomFilterMapFile.Reader {
checkKey(key);
return super.seek(key);
}
-}
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/io/HbaseMapWritable.java b/src/java/org/apache/hadoop/hbase/io/HbaseMapWritable.java
index 951b8f1..e119490 100644
--- a/src/java/org/apache/hadoop/hbase/io/HbaseMapWritable.java
+++ b/src/java/org/apache/hadoop/hbase/io/HbaseMapWritable.java
@@ -45,7 +45,7 @@ import org.apache.hadoop.util.ReflectionUtils;
* if passed a value type that it has not already been told about. Its been
* primed with hbase Writables and byte []. Keys are always byte arrays.
*
- * @param key TODO: Parameter K is never used, could be removed.
+ * @param key TODO: Parameter K is never used, could be removed.
* @param value Expects a Writable or byte [].
*/
public class HbaseMapWritable
@@ -191,7 +191,7 @@ implements SortedMap, Writable, Configurable {
// Then write out each key/value pair
for (Map.Entry e: instance.entrySet()) {
Bytes.writeByteArray(out, e.getKey());
- Byte id =getId(e.getValue().getClass());
+ Byte id = getId(e.getValue().getClass());
out.writeByte(id);
Object value = e.getValue();
if (value instanceof byte []) {
diff --git a/src/java/org/apache/hadoop/hbase/io/MapFile.java b/src/java/org/apache/hadoop/hbase/io/MapFile.java
index 49e9262..e8b79d5 100644
--- a/src/java/org/apache/hadoop/hbase/io/MapFile.java
+++ b/src/java/org/apache/hadoop/hbase/io/MapFile.java
@@ -171,11 +171,13 @@ public class MapFile {
CompressionType.BLOCK, progress);
}
- /** The number of entries that are added before an index entry is added.*/
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileWriter#getIndexInterval()
+ */
public int getIndexInterval() { return indexInterval; }
- /** Sets the index interval.
- * @see #getIndexInterval()
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileWriter#setIndexInterval(int)
*/
public void setIndexInterval(int interval) { indexInterval = interval; }
@@ -186,14 +188,17 @@ public class MapFile {
conf.setInt(INDEX_INTERVAL, interval);
}
- /** Close the map. */
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileWriter#close()
+ */
public synchronized void close() throws IOException {
data.close();
index.close();
}
- /** Append a key/value pair to the map. The key must be greater or equal
- * to the previous key added to the map. */
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileWriter#append(org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable)
+ */
public synchronized void append(WritableComparable key, Writable val)
throws IOException {
@@ -250,10 +255,14 @@ public class MapFile {
private WritableComparable[] keys;
private long[] positions;
- /** Returns the class of keys in this file. */
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#getKeyClass()
+ */
public Class> getKeyClass() { return data.getKeyClass(); }
- /** Returns the class of values in this file. */
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#getValueClass()
+ */
public Class> getValueClass() { return data.getValueClass(); }
/** Construct a map reader for the named map.*/
@@ -362,14 +371,15 @@ public class MapFile {
}
}
- /** Re-positions the reader before its first key. */
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#reset()
+ */
public synchronized void reset() throws IOException {
data.seek(firstPosition);
}
- /** Get the key at approximately the middle of the file.
- *
- * @throws IOException
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#midKey()
*/
public synchronized WritableComparable midKey() throws IOException {
@@ -382,9 +392,8 @@ public class MapFile {
return keys[pos];
}
- /** Reads the final key from the file.
- *
- * @param key key to read into
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#finalKey(org.apache.hadoop.io.WritableComparable)
*/
public synchronized void finalKey(WritableComparable key)
throws IOException {
@@ -404,9 +413,8 @@ public class MapFile {
}
}
- /** Positions the reader at the named key, or if none such exists, at the
- * first entry after the named key. Returns true iff the named key exists
- * in this map.
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#seek(org.apache.hadoop.io.WritableComparable)
*/
public synchronized boolean seek(WritableComparable key) throws IOException {
return seekInternal(key) == 0;
@@ -517,15 +525,17 @@ public class MapFile {
return -(low + 1); // key not found.
}
- /** Read the next key/value pair in the map into key and
- * val. Returns true if such a pair exists and false when at
- * the end of the map */
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#next(org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable)
+ */
public synchronized boolean next(WritableComparable key, Writable val)
throws IOException {
return data.next(key, val);
}
- /** Return the value for the named key, or null if none exists. */
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#get(org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable)
+ */
public synchronized Writable get(WritableComparable key, Writable val)
throws IOException {
if (seek(key)) {
@@ -535,14 +545,8 @@ public class MapFile {
return null;
}
- /**
- * Finds the record that is the closest match to the specified key.
- * Returns key or if it does not exist, at the first entry
- * after the named key.
- *
-- * @param key - key that we're trying to find
-- * @param val - data value if key is found
-- * @return - the key that was the closest match or null if eof.
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#getClosest(org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable)
*/
public synchronized WritableComparable getClosest(WritableComparable key,
Writable val)
@@ -550,15 +554,8 @@ public class MapFile {
return getClosest(key, val, false);
}
- /**
- * Finds the record that is the closest match to the specified key.
- *
- * @param key - key that we're trying to find
- * @param val - data value if key is found
- * @param before - IF true, and key does not exist, return
- * the first entry that falls just before the key. Otherwise,
- * return the record that sorts just after.
- * @return - the key that was the closest match or null if eof.
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#getClosest(org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable, boolean)
*/
public synchronized WritableComparable getClosest(WritableComparable key,
Writable val, final boolean before)
@@ -578,7 +575,9 @@ public class MapFile {
return nextKey;
}
- /** Close the map. */
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hbase.io.StoreFileReader#close()
+ */
public synchronized void close() throws IOException {
if (!indexClosed) {
index.close();
diff --git a/src/java/org/apache/hadoop/hbase/io/Reference.java b/src/java/org/apache/hadoop/hbase/io/Reference.java
index 71fbec0..deebf48 100644
--- a/src/java/org/apache/hadoop/hbase/io/Reference.java
+++ b/src/java/org/apache/hadoop/hbase/io/Reference.java
@@ -7,41 +7,34 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.io.Writable;
/**
- * A reference to a part of a store file. The file referenced usually lives
- * under a different region. The part referenced is usually the top or bottom
- * half of the file. References are made at region split time. Being lazy
- * about copying data between the parent of the split and the split daughters
- * makes splitting faster.
+ * A reference to the top or bottom half of a store file. The file referenced
+ * lives under a different region. References are made at region split time.
*
- * References work with {@link HalfMapFileReader}. References know how to
- * write out the reference format in the file system and are whats juggled when
- * references are mixed in with direct store files. The
- * {@link HalfMapFileReader} is used reading the referred to file.
+ *
References work with a special half store file type. References know how
+ * to write out the reference format in the file system and are whats juggled
+ * when references are mixed in with direct store files. The half store file
+ * type is used reading the referred to file.
*
*
References to store files located over in some other region look like
* this in the file system
- * 1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184:
- * i.e. an id followed by the name of the referenced region. The data
- * ('mapfiles') of references are empty. The accompanying info file
- * contains the midkey that demarks top and bottom of the
- * referenced storefile, the id of the remote store we're referencing and
- * whether we're to serve the top or bottom region of the remote store file.
+ * 1278437856009925445.3323223323:
+ * i.e. an id followed by hash of the referenced region.
* Note, a region is itself not splitable if it has instances of store file
* references. References are cleaned up by compactions.
*/
public class Reference implements Writable {
- // TODO: see if it makes sense making a ReferenceMapFile whose Writer is this
- // class and whose Reader is the {@link HalfMapFileReader}.
-
- private int encodedRegionName;
- private long fileid;
+ private byte [] splitkey;
private Range region;
- private HStoreKey midkey;
-
+
/**
* For split HStoreFiles, it specifies if the file covers the lower half or
* the upper half of the key range
@@ -52,66 +45,86 @@ public class Reference implements Writable {
/** HStoreFile contains lower half of key range */
bottom
}
-
- public Reference(final int ern, final long fid, final HStoreKey m,
- final Range fr) {
- this.encodedRegionName = ern;
- this.fileid = fid;
+
+ /**
+ * Constructor
+ * @param r
+ * @param s This is a serialized storekey with the row we are to split on,
+ * an empty column and a timestamp of the LATEST_TIMESTAMP. This is the first
+ * possible entry in a row. This is what we are splitting around.
+ * @param fr
+ */
+ public Reference(final byte [] s, final Range fr) {
+ this.splitkey = s;
this.region = fr;
- this.midkey = m;
- }
-
- public Reference() {
- this(-1, -1, null, Range.bottom);
}
- public long getFileId() {
- return fileid;
+ /**
+ * Used by serializations.
+ */
+ public Reference() {
+ this(null, Range.bottom);
}
public Range getFileRegion() {
- return region;
- }
-
- public HStoreKey getMidkey() {
- return midkey;
+ return this.region;
}
-
- public int getEncodedRegionName() {
- return this.encodedRegionName;
+
+ public byte [] getSplitKey() {
+ return splitkey;
}
- @Override
public String toString() {
- return encodedRegionName + "/" + fileid + "/" + region;
+ return "" + this.region;
}
// Make it serializable.
public void write(DataOutput out) throws IOException {
- // Write out the encoded region name as a String. Doing it as a String
- // keeps a Reference's serialization backword compatible with
- // pre-HBASE-82 serializations. ALternative is rewriting all
- // info files in hbase (Serialized References are written into the
- // 'info' file that accompanies HBase Store files).
- out.writeUTF(Integer.toString(encodedRegionName));
- out.writeLong(fileid);
// Write true if we're doing top of the file.
- out.writeBoolean(isTopFileRegion(region));
- this.midkey.write(out);
+ out.writeBoolean(isTopFileRegion(this.region));
+ Bytes.writeByteArray(out, this.splitkey);
}
public void readFields(DataInput in) throws IOException {
- this.encodedRegionName = Integer.parseInt(in.readUTF());
- fileid = in.readLong();
boolean tmp = in.readBoolean();
// If true, set region to top.
- region = tmp? Range.top: Range.bottom;
- midkey = new HStoreKey();
- midkey.readFields(in);
+ this.region = tmp? Range.top: Range.bottom;
+ this.splitkey = Bytes.readByteArray(in);
}
-
+
public static boolean isTopFileRegion(final Range r) {
return r.equals(Range.top);
}
+
+ public Path write(final FileSystem fs, final Path p)
+ throws IOException {
+ FSUtils.create(fs, p);
+ FSDataOutputStream out = fs.create(p);
+ try {
+ write(out);
+ } finally {
+ out.close();
+ }
+ return p;
+ }
+
+ /**
+ * Read a Reference from FileSystem.
+ * @param fs
+ * @param p
+ * @return New Reference made from passed p
+ * @throws IOException
+ */
+ public static Reference read(final FileSystem fs, final Path p)
+ throws IOException {
+ FSDataInputStream in = fs.open(p);
+ try {
+ Reference r = new Reference();
+ r.readFields(in);
+ return r;
+ } finally {
+ in.close();
+ }
+ }
}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/src/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
new file mode 100644
index 0000000..91cfaf6
--- /dev/null
+++ b/src/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Block cache interface.
+ * TODO: Add filename or hash of filename to block cache key.
+ */
+public interface BlockCache {
+ /**
+ * Add block to cache.
+ * @param blockNumber Zero-based file block number.
+ * @param buf The block contents wrapped in a ByteBuffer.
+ */
+ public void cacheBlock(String blockName, ByteBuffer buf);
+
+ /**
+ * Fetch block from cache.
+ * @param blockNumber Block number to fetch.
+ * @return Block or null if block is not in the cache.
+ */
+ public ByteBuffer getBlock(String blockName);
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/BoundedRangeFileInputStream.java b/src/java/org/apache/hadoop/hbase/io/hfile/BoundedRangeFileInputStream.java
new file mode 100644
index 0000000..ae7734a
--- /dev/null
+++ b/src/java/org/apache/hadoop/hbase/io/hfile/BoundedRangeFileInputStream.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+
+/**
+ * BoundedRangeFIleInputStream abstracts a contiguous region of a Hadoop
+ * FSDataInputStream as a regular input stream. One can create multiple
+ * BoundedRangeFileInputStream on top of the same FSDataInputStream and they
+ * would not interfere with each other.
+ * Copied from hadoop-335 tfile.
+ */
+class BoundedRangeFileInputStream extends InputStream {
+
+ private FSDataInputStream in;
+ private long pos;
+ private long end;
+ private long mark;
+ private final byte[] oneByte = new byte[1];
+
+ /**
+ * Constructor
+ *
+ * @param in
+ * The FSDataInputStream we connect to.
+ * @param offset
+ * Beginning offset of the region.
+ * @param length
+ * Length of the region.
+ *
+ * The actual length of the region may be smaller if (off_begin +
+ * length) goes beyond the end of FS input stream.
+ */
+ public BoundedRangeFileInputStream(FSDataInputStream in, long offset,
+ long length) {
+ if (offset < 0 || length < 0) {
+ throw new IndexOutOfBoundsException("Invalid offset/length: " + offset
+ + "/" + length);
+ }
+
+ this.in = in;
+ this.pos = offset;
+ this.end = offset + length;
+ this.mark = -1;
+ }
+
+ @Override
+ public int available() throws IOException {
+ int avail = in.available();
+ if (pos + avail > end) {
+ avail = (int) (end - pos);
+ }
+
+ return avail;
+ }
+
+ @Override
+ public int read() throws IOException {
+ int ret = read(oneByte);
+ if (ret == 1) return oneByte[0] & 0xff;
+ return -1;
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException {
+ return read(b, 0, b.length);
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
+ throw new IndexOutOfBoundsException();
+ }
+
+ int n = (int) Math.min(Integer.MAX_VALUE, Math.min(len, (end - pos)));
+ if (n == 0) return -1;
+ int ret = 0;
+ synchronized (in) {
+ in.seek(pos);
+ ret = in.read(b, off, n);
+ }
+ // / ret = in.read(pos, b, off, n);
+ if (ret < 0) {
+ end = pos;
+ return -1;
+ }
+ pos += ret;
+ return ret;
+ }
+
+ @Override
+ /*
+ * We may skip beyond the end of the file.
+ */
+ public long skip(long n) throws IOException {
+ long len = Math.min(n, end - pos);
+ pos += len;
+ return len;
+ }
+
+ @Override
+ public void mark(int readlimit) {
+ mark = pos;
+ }
+
+ @Override
+ public void reset() throws IOException {
+ if (mark < 0) throw new IOException("Resetting to invalid mark");
+ pos = mark;
+ }
+
+ @Override
+ public boolean markSupported() {
+ return true;
+ }
+
+ @Override
+ public void close() {
+ // Invalidate the state of the stream.
+ in = null;
+ pos = end;
+ mark = -1;
+ }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/Compression.java b/src/java/org/apache/hadoop/hbase/io/hfile/Compression.java
new file mode 100644
index 0000000..249bc42
--- /dev/null
+++ b/src/java/org/apache/hadoop/hbase/io/hfile/Compression.java
@@ -0,0 +1,324 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.CodecPool;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionInputStream;
+import org.apache.hadoop.io.compress.CompressionOutputStream;
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.io.compress.Decompressor;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.io.compress.LzoCodec;
+
+/**
+ * Compression related stuff.
+ * Copied from hadoop-3315 tfile.
+ */
+public final class Compression {
+ static final Log LOG = LogFactory.getLog(Compression.class);
+
+ /**
+ * Prevent the instantiation of class.
+ */
+ private Compression() {
+ super();
+ }
+
+ static class FinishOnFlushCompressionStream extends FilterOutputStream {
+ public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
+ super(cout);
+ }
+
+ @Override
+ public void write(byte b[], int off, int len) throws IOException {
+ out.write(b, off, len);
+ }
+
+ @Override
+ public void flush() throws IOException {
+ CompressionOutputStream cout = (CompressionOutputStream) out;
+ cout.finish();
+ cout.flush();
+ cout.resetState();
+ }
+ }
+
+ /**
+ * Compression algorithms.
+ */
+ public static enum Algorithm {
+ LZO("lzo") {
+ private LzoCodec codec;
+
+ @Override
+ CompressionCodec getCodec() {
+ if (codec == null) {
+ Configuration conf = new Configuration();
+ conf.setBoolean("hadoop.native.lib", true);
+ codec = new LzoCodec();
+ codec.setConf(conf);
+ }
+
+ return codec;
+ }
+
+ @Override
+ public synchronized InputStream createDecompressionStream(
+ InputStream downStream, Decompressor decompressor,
+ int downStreamBufferSize) throws IOException {
+ InputStream bis1 = null;
+ if (downStreamBufferSize > 0) {
+ bis1 = new BufferedInputStream(downStream, downStreamBufferSize);
+ }
+ else {
+ bis1 = downStream;
+ }
+ codec.getConf()
+ .setInt("io.compression.codec.lzo.buffersize", 64 * 1024);
+ CompressionInputStream cis =
+ codec.createInputStream(bis1, decompressor);
+ BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
+ return bis2;
+ }
+
+ @Override
+ public synchronized OutputStream createCompressionStream(
+ OutputStream downStream, Compressor compressor,
+ int downStreamBufferSize) throws IOException {
+ OutputStream bos1 = null;
+ if (downStreamBufferSize > 0) {
+ bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
+ }
+ else {
+ bos1 = downStream;
+ }
+ codec.getConf()
+ .setInt("io.compression.codec.lzo.buffersize", 64 * 1024);
+ CompressionOutputStream cos =
+ codec.createOutputStream(bos1, compressor);
+ BufferedOutputStream bos2 =
+ new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
+ DATA_OBUF_SIZE);
+ return bos2;
+ }
+ },
+
+ GZ("gz") {
+ private GzipCodec codec;
+
+ @Override
+ CompressionCodec getCodec() {
+ if (codec == null) {
+ Configuration conf = new Configuration();
+ conf.setBoolean("hadoop.native.lib", true);
+ codec = new GzipCodec();
+ codec.setConf(conf);
+ }
+
+ return codec;
+ }
+
+ @Override
+ public synchronized InputStream createDecompressionStream(
+ InputStream downStream, Decompressor decompressor,
+ int downStreamBufferSize) throws IOException {
+ // Set the internal buffer size to read from down stream.
+ if (downStreamBufferSize > 0) {
+ codec.getConf().setInt("io.file.buffer.size", downStreamBufferSize);
+ }
+ CompressionInputStream cis =
+ codec.createInputStream(downStream, decompressor);
+ BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
+ return bis2;
+ }
+
+ @Override
+ public synchronized OutputStream createCompressionStream(
+ OutputStream downStream, Compressor compressor,
+ int downStreamBufferSize) throws IOException {
+ OutputStream bos1 = null;
+ if (downStreamBufferSize > 0) {
+ bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
+ }
+ else {
+ bos1 = downStream;
+ }
+ codec.getConf().setInt("io.file.buffer.size", 32 * 1024);
+ CompressionOutputStream cos =
+ codec.createOutputStream(bos1, compressor);
+ BufferedOutputStream bos2 =
+ new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
+ DATA_OBUF_SIZE);
+ return bos2;
+ }
+ },
+
+ NONE("none") {
+ @Override
+ CompressionCodec getCodec() {
+ return null;
+ }
+
+ @Override
+ public synchronized InputStream createDecompressionStream(
+ InputStream downStream, Decompressor decompressor,
+ int downStreamBufferSize) throws IOException {
+ if (downStreamBufferSize > 0) {
+ return new BufferedInputStream(downStream, downStreamBufferSize);
+ }
+ // else {
+ // Make sure we bypass FSInputChecker buffer.
+ // return new BufferedInputStream(downStream, 1024);
+ // }
+ // }
+ return downStream;
+ }
+
+ @Override
+ public synchronized OutputStream createCompressionStream(
+ OutputStream downStream, Compressor compressor,
+ int downStreamBufferSize) throws IOException {
+ if (downStreamBufferSize > 0) {
+ return new BufferedOutputStream(downStream, downStreamBufferSize);
+ }
+
+ return downStream;
+ }
+ };
+
+ private final String compressName;
+ // data input buffer size to absorb small reads from application.
+ private static final int DATA_IBUF_SIZE = 1 * 1024;
+ // data output buffer size to absorb small writes from application.
+ private static final int DATA_OBUF_SIZE = 4 * 1024;
+
+ Algorithm(String name) {
+ this.compressName = name;
+ }
+
+ abstract CompressionCodec getCodec();
+
+ public abstract InputStream createDecompressionStream(
+ InputStream downStream, Decompressor decompressor,
+ int downStreamBufferSize) throws IOException;
+
+ public abstract OutputStream createCompressionStream(
+ OutputStream downStream, Compressor compressor, int downStreamBufferSize)
+ throws IOException;
+
+ public Compressor getCompressor() {
+ CompressionCodec codec = getCodec();
+ if (codec != null) {
+ Compressor compressor = CodecPool.getCompressor(codec);
+ if (compressor != null) {
+ if (compressor.finished()) {
+ // Somebody returns the compressor to CodecPool but is still using
+ // it.
+ LOG
+ .warn("Compressor obtained from CodecPool is already finished()");
+ // throw new AssertionError(
+ // "Compressor obtained from CodecPool is already finished()");
+ }
+ else {
+ LOG.debug("Got a compressor: " + compressor.hashCode());
+ }
+ compressor.reset();
+ }
+ return compressor;
+ }
+ return null;
+ }
+
+ public void returnCompressor(Compressor compressor) {
+ if (compressor != null) {
+ LOG.debug("Return a compressor: " + compressor.hashCode());
+ CodecPool.returnCompressor(compressor);
+ }
+ }
+
+ public Decompressor getDecompressor() {
+ CompressionCodec codec = getCodec();
+ if (codec != null) {
+ Decompressor decompressor = CodecPool.getDecompressor(codec);
+ if (decompressor != null) {
+ if (decompressor.finished()) {
+ // Somebody returns the decompressor to CodecPool but is still using
+ // it.
+ LOG
+ .warn("Deompressor obtained from CodecPool is already finished()");
+ // throw new AssertionError(
+ // "Decompressor obtained from CodecPool is already finished()");
+ }
+ else {
+ LOG.debug("Got a decompressor: " + decompressor.hashCode());
+ }
+ decompressor.reset();
+ }
+ return decompressor;
+ }
+
+ return null;
+ }
+
+ public void returnDecompressor(Decompressor decompressor) {
+ if (decompressor != null) {
+ LOG.debug("Returned a decompressor: " + decompressor.hashCode());
+ CodecPool.returnDecompressor(decompressor);
+ }
+ }
+
+ public String getName() {
+ return compressName;
+ }
+ }
+
+ static Algorithm getCompressionAlgorithmByName(String compressName) {
+ Algorithm[] algos = Algorithm.class.getEnumConstants();
+
+ for (Algorithm a : algos) {
+ if (a.getName().equals(compressName)) {
+ return a;
+ }
+ }
+
+ throw new IllegalArgumentException(
+ "Unsupported compression algorithm name: " + compressName);
+ }
+
+ static String[] getSupportedAlgorithms() {
+ Algorithm[] algos = Algorithm.class.getEnumConstants();
+
+ String[] ret = new String[algos.length];
+ int i = 0;
+ for (Algorithm a : algos) {
+ ret[i++] = a.getName();
+ }
+
+ return ret;
+ }
+}
diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java
new file mode 100644
index 0000000..3a2915d
--- /dev/null
+++ b/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -0,0 +1,1423 @@
+/**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.Closeable;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.HbaseMapWritable;
+import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.io.compress.Decompressor;
+
+/**
+ * File format for hbase.
+ * A file of sorted key/value pairs. Both keys and values are byte arrays.
+ *
+ * The memory footprint of a HFile includes the following (below is taken from
+ * Hadoop-3315 tfile
+ * but applies also to HFile):
+ *
+ * - Some constant overhead of reading or writing a compressed block.
+ *
+ * - Each compressed block requires one compression/decompression codec for
+ * I/O.
+ *
- Temporary space to buffer the key.
+ *
- Temporary space to buffer the value.
+ *
+ * - HFile index, which is proportional to the total number of Data Blocks.
+ * The total amount of memory needed to hold the index can be estimated as
+ * (56+AvgKeySize)*NumBlocks.
+ *
+ * Suggestions on performance optimization.
+ *
+ * - Minimum block size. We recommend a setting of minimum block size between
+ * 8KB to 1MB for general usage. Larger block size is preferred if files are
+ * primarily for sequential access. However, it would lead to inefficient random
+ * access (because there are more data to decompress). Smaller blocks are good
+ * for random access, but require more memory to hold the block index, and may
+ * be slower to create (because we must flush the compressor stream at the
+ * conclusion of each data block, which leads to an FS I/O flush). Further, due
+ * to the internal caching in Compression codec, the smallest possible block
+ * size would be around 20KB-30KB.
+ *
- The current implementation does not offer true multi-threading for
+ * reading. The implementation uses FSDataInputStream seek()+read(), which is
+ * shown to be much faster than positioned-read call in single thread mode.
+ * However, it also means that if multiple threads attempt to access the same
+ * HFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ * sequentially even if they access different DFS blocks (Reexamine! pread seems
+ * to be 10% faster than seek+read in my testing -- stack).
+ *
- Compression codec. Use "none" if the data is not very compressable (by
+ * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ * as the starting point for experimenting. "gz" overs slightly better
+ * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ * decompress, comparing to "lzo".
+ *
+ *
+ * For more on the background behind HFile, see HBASE-61.
+ *
+ * File is made of data blocks followed by meta data blocks (if any), a fileinfo
+ * block, data block index, meta data block index, and a fixed size trailer
+ * which records the offsets at which file changes content type.
+ *
<data blocks><meta blocks><fileinfo><data index><meta index><trailer>
+ * Each block has a bit of magic at its start. Block are comprised of
+ * key/values. In data blocks, they are both byte arrays. Metadata blocks are
+ * a String key and a byte array value. An empty file looks like this:
+ * <fileinfo><trailer>
. That is, there are not data nor meta
+ * blocks present.
+ *
+ * TODO: Bloomfilters. Need to add hadoop 0.20. first since it has bug fixes
+ * on the hadoop bf package.
+ * * TODO: USE memcmp by default? Write the keys out in an order that allows
+ * my using this -- reverse the timestamp.
+ * TODO: Add support for fast-gzip and for lzo.
+ * TODO: Do scanners need to be able to take a start and end row?
+ * TODO: Should BlockIndex know the name of its file? Should it have a Path
+ * that points at its file say for the case where an index lives apart from
+ * an HFile instance?
+ */
+public class HFile {
+ static final Log LOG = LogFactory.getLog(HFile.class);
+
+ /* These values are more or less arbitrary, and they are used as a
+ * form of check to make sure the file isn't completely corrupt.
+ */
+ final static byte [] DATABLOCKMAGIC =
+ {'D', 'A', 'T', 'A', 'B', 'L', 'K', 42 };
+ final static byte [] INDEXBLOCKMAGIC =
+ { 'I', 'D', 'X', 'B', 'L', 'K', 41, 43 };
+ final static byte [] METABLOCKMAGIC =
+ { 'M', 'E', 'T', 'A', 'B', 'L', 'K', 99 };
+ final static byte [] TRAILERBLOCKMAGIC =
+ { 'T', 'R', 'A', 'B', 'L', 'K', 34, 36 };
+
+ /**
+ * Maximum length of key in HFile.
+ */
+ public final static int MAXIMUM_KEY_LENGTH = 64 * 1024;
+
+ /**
+ * Default blocksize for hfile.
+ */
+ public final static int DEFAULT_BLOCKSIZE = 64 * 1024;
+
+ /**
+ * Default compression: none.
+ */
+ public final static String DEFAULT_COMPRESSION =
+ Compression.Algorithm.NONE.getName();
+
+ /**
+ * HFile Writer.
+ */
+ public static class Writer implements Closeable {
+ // FileSystem stream to write on.
+ private FSDataOutputStream outputStream;
+ // True if we opened the outputStream (and so will close it).
+ private boolean closeOutputStream;
+
+ // Name for this object used when logging or in toString. Is either
+ // the result of a toString on stream or else toString of passed file Path.
+ private String name;
+
+ // Total uncompressed bytes, maybe calculate a compression ratio later.
+ private int totalBytes = 0;
+
+ // Total # of key/value entries, ie: how many times add() was called.
+ private int entryCount = 0;
+
+ // Used calculating average key and value lengths.
+ private long keylength = 0;
+ private long valuelength = 0;
+
+ // Used to ensure we write in order.
+ private final RawComparator comparator;
+
+ // A stream made per block written.
+ private DataOutputStream out;
+
+ // Number of uncompressed bytes per block. Reinitialized when we start
+ // new block.
+ private int blocksize;
+
+ // Offset where the current block began.
+ private long blockBegin;
+
+ // First key in a block (Not first key in file).
+ private byte [] firstKey = null;
+
+ // Key previously appended. Becomes the last key in the file.
+ private byte [] lastKey = null;
+
+ // See {@link BlockIndex}. Below four fields are used to write the block
+ // index.
+ ArrayList blockKeys = new ArrayList();
+ // Block offset in backing stream.
+ ArrayList blockOffsets = new ArrayList();
+ // Raw (decompressed) data size.
+ ArrayList blockDataSizes = new ArrayList();
+
+ // Meta block system.
+ private ArrayList metaNames = new ArrayList();
+ private ArrayList metaData = new ArrayList();
+
+ // Used compression. Used even if no compression -- 'none'.
+ private final Compression.Algorithm compressAlgo;
+ private Compressor compressor;
+
+ // Special datastructure to hold fileinfo.
+ private FileInfo fileinfo = new FileInfo();
+
+ // May be null if we were passed a stream.
+ private Path path = null;
+
+ /**
+ * Constructor that uses all defaults for compression and block size.
+ * @param fs
+ * @param path
+ * @throws IOException
+ */
+ public Writer(FileSystem fs, Path path)
+ throws IOException {
+ this(fs, path, DEFAULT_BLOCKSIZE, null, null);
+ }
+
+ /**
+ * Constructor that takes a Path.
+ * @param fs
+ * @param path
+ * @param blocksize
+ * @param compress
+ * @param comparator
+ * @throws IOException
+ */
+ public Writer(FileSystem fs, Path path, int blocksize, String compress,
+ final RawComparator comparator)
+ throws IOException {
+ this(fs.create(path), blocksize, compress, comparator);
+ this.closeOutputStream = true;
+ this.name = path.toString();
+ this.path = path;
+ }
+
+ /**
+ * Constructor that takes a stream.
+ * @param ostream Stream to use.
+ * @param blocksize
+ * @param compress
+ * @param c
+ * @throws IOException
+ */
+ public Writer(final FSDataOutputStream ostream, final int blocksize,
+ final String compress, final RawComparator c)
+ throws IOException {
+ this.outputStream = ostream;
+ this.closeOutputStream = false;
+ this.blocksize = blocksize;
+ this.comparator = c == null? Bytes.BYTES_RAWCOMPARATOR: c;
+ this.name = this.outputStream.toString();
+ this.compressAlgo =
+ Compression.getCompressionAlgorithmByName(compress == null?
+ Compression.Algorithm.NONE.getName(): compress);
+ }
+
+ /*
+ * If at block boundary, opens new block.
+ * @throws IOException
+ */
+ private void checkBlockBoundary() throws IOException {
+ if (this.out != null && this.out.size() < blocksize) return;
+ finishBlock();
+ newBlock();
+ }
+
+ /*
+ * Do the cleanup if a current block.
+ * @throws IOException
+ */
+ private void finishBlock() throws IOException {
+ if (this.out == null) return;
+ long size = releaseCompressingStream(this.out);
+ this.out = null;
+
+ blockKeys.add(firstKey);
+ int written = longToInt(size);
+ blockOffsets.add(Long.valueOf(blockBegin));
+ blockDataSizes.add(Integer.valueOf(written));
+ this.totalBytes += written;
+ }
+
+ /*
+ * Ready a new block for writing.
+ * @throws IOException
+ */
+ private void newBlock() throws IOException {
+ // This is where the next block begins.
+ blockBegin = outputStream.getPos();
+ this.out = getCompressingStream();
+ this.out.write(DATABLOCKMAGIC);
+ firstKey = null;
+ }
+
+ /*
+ * Sets up a compressor and creates a compression stream on top of
+ * this.outputStream. Get one per block written.
+ * @return A compressing stream; if 'none' compression, returned stream
+ * does not compress.
+ * @throws IOException
+ * @see {@link #releaseCompressingStream(DataOutputStream)}
+ */
+ private DataOutputStream getCompressingStream() throws IOException {
+ this.compressor = compressAlgo.getCompressor();
+ // Get new DOS compression stream. In tfile, the DOS, is not closed,
+ // just finished, and that seems to be fine over there. TODO: Check
+ // no memory retention of the DOS. Should I disable the 'flush' on the
+ // DOS as the BCFile over in tfile does? It wants to make it so flushes
+ // don't go through to the underlying compressed stream. Flush on the
+ // compressed downstream should be only when done. I was going to but
+ // looks like when we call flush in here, its legitimate flush that
+ // should go through to the compressor.
+ OutputStream os =
+ this.compressAlgo.createCompressionStream(this.outputStream,
+ this.compressor, 0);
+ return new DataOutputStream(os);
+ }
+
+ /*
+ * Let go of block compressor and compressing stream gotten in call
+ * {@link #getCompressingStream}.
+ * @param dos
+ * @return How much was written on this stream since it was taken out.
+ * @see #getCompressingStream()
+ * @throws IOException
+ */
+ private int releaseCompressingStream(final DataOutputStream dos)
+ throws IOException {
+ dos.flush();
+ this.compressAlgo.returnCompressor(this.compressor);
+ this.compressor = null;
+ return dos.size();
+ }
+
+ /**
+ * Add a meta block to the end of the file. Call before close().
+ * Metadata blocks are expensive. Fill one with a bunch of serialized data
+ * rather than do a metadata block per metadata instance. If metadata is
+ * small, consider adding to file info using
+ * {@link #appendFileInfo(byte[], byte[])}
+ * @param metaBlockName name of the block
+ * @param bytes uninterpreted bytes of the block.
+ */
+ public void appendMetaBlock(String metaBlockName, byte [] bytes) {
+ metaNames.add(Bytes.toBytes(metaBlockName));
+ metaData.add(bytes);
+ }
+
+ /**
+ * Add to the file info. Added key value can be gotten out of the return
+ * from {@link Reader#loadFileInfo()}.
+ * @param k Key
+ * @param v Value
+ * @throws IOException
+ */
+ public void appendFileInfo(final byte [] k, final byte [] v)
+ throws IOException {
+ appendFileInfo(this.fileinfo, k, v, true);
+ }
+
+ FileInfo appendFileInfo(FileInfo fi, final byte [] k, final byte [] v,
+ final boolean checkPrefix)
+ throws IOException {
+ if (k == null || v == null) {
+ throw new NullPointerException("Key nor value may be null");
+ }
+ if (checkPrefix &&
+ Bytes.toString(k).toLowerCase().startsWith(FileInfo.RESERVED_PREFIX)) {
+ throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX +
+ " are reserved");
+ }
+ fi.put(k, v);
+ return fi;
+ }
+
+ /**
+ * @return Path or null if we were passed a stream rather than a Path.
+ */
+ public Path getPath() {
+ return this.path;
+ }
+
+ public String toString() {
+ return "writer=" + this.name + ", compression=" +
+ this.compressAlgo.getName();
+ }
+
+ /**
+ * Add key/value to file.
+ * Keys must be added in an order that agrees with the RawComparator passed
+ * on construction.
+ * @param key Key to add. Cannot be empty nor null.
+ * @param value Value to add. Cannot be empty nor null.
+ * @throws IOException
+ */
+ public void append(final byte [] key, final byte [] value)
+ throws IOException {
+ checkKey(key);
+ checkValue(value);
+ checkBlockBoundary();
+ // Write length of key and value and then actual key and value bytes.
+ this.out.writeInt(key.length);
+ this.keylength += key.length;
+ this.out.writeInt(value.length);
+ this.valuelength += valuelength;
+ this.out.write(key);
+ this.out.write(value);
+ // Are we the first key in this block?
+ if (this.firstKey == null) this.firstKey = key;
+ this.lastKey = key;
+ this.entryCount ++;
+ }
+
+ /*
+ * @param key Key to check.
+ * @throws IOException
+ */
+ private void checkKey(final byte [] key) throws IOException {
+ if (key == null || key.length <= 0) {
+ throw new IOException("Key cannot be null or empty");
+ }
+ if (key.length > MAXIMUM_KEY_LENGTH) {
+ throw new IOException("Key length " + key.length + " > " +
+ MAXIMUM_KEY_LENGTH);
+ }
+ if (this.lastKey != null) {
+ if (this.comparator.compare(this.lastKey, key) > 0) {
+ throw new IOException("Added a key not lexically larger than" +
+ " previous: key=" + Bytes.toString(key) + ", lastkey=" +
+ Bytes.toString(lastKey));
+ }
+ }
+ }
+
+ private void checkValue(final byte [] value) throws IOException {
+ if (value == null || value.length <= 0) {
+ throw new IOException("Value cannot be null or empty");
+ }
+ }
+
+ public void close() throws IOException {
+ if (this.outputStream == null) {
+ return;
+ }
+ // Write out the end of the data blocks, then write meta data blocks.
+ // followed by fileinfo, data block index and meta block index.
+
+ finishBlock();
+
+ FixedFileTrailer trailer = new FixedFileTrailer();
+
+ // Write out the metadata blocks if any.
+ ArrayList metaOffsets = null;
+ ArrayList metaDataSizes = null;
+ if (metaNames.size() > 0) {
+ metaOffsets = new ArrayList(metaNames.size());
+ metaDataSizes = new ArrayList(metaNames.size());
+ for (int i = 0 ; i < metaNames.size() ; ++ i ) {
+ metaOffsets.add(Long.valueOf(outputStream.getPos()));
+ metaDataSizes.
+ add(Integer.valueOf(METABLOCKMAGIC.length + metaData.get(i).length));
+ writeMetaBlock(metaData.get(i));
+ }
+ }
+
+ // Write fileinfo.
+ trailer.fileinfoOffset = writeFileInfo(this.outputStream);
+
+ // Write the data block index.
+ trailer.dataIndexOffset = BlockIndex.writeIndex(this.outputStream,
+ this.blockKeys, this.blockOffsets, this.blockDataSizes);
+
+ // Meta block index.
+ if (metaNames.size() > 0) {
+ trailer.metaIndexOffset = BlockIndex.writeIndex(this.outputStream,
+ this.metaNames, metaOffsets, metaDataSizes);
+ }
+
+ // Now finish off the trailer.
+ trailer.dataIndexCount = blockKeys.size();
+ trailer.metaIndexCount = metaNames.size();
+
+ trailer.totalUncompressedBytes = totalBytes;
+ trailer.entryCount = entryCount;
+
+ trailer.compressionCodec = this.compressAlgo.ordinal();
+
+ trailer.serialize(outputStream);
+
+ if (this.closeOutputStream) {
+ this.outputStream.close();
+ this.outputStream = null;
+ }
+ }
+
+ /* Write a metadata block.
+ * @param metadata
+ * @throws IOException
+ */
+ private void writeMetaBlock(final byte [] b) throws IOException {
+ DataOutputStream dos = getCompressingStream();
+ dos.write(METABLOCKMAGIC);
+ dos.write(b);
+ releaseCompressingStream(dos);
+ }
+
+ /*
+ * Add last bits of metadata to fileinfo and then write it out.
+ * Reader will be expecting to find all below.
+ * @param o Stream to write on.
+ * @return Position at which we started writing.
+ * @throws IOException
+ */
+ private long writeFileInfo(FSDataOutputStream o) throws IOException {
+ if (this.lastKey != null) {
+ appendFileInfo(this.fileinfo, FileInfo.LASTKEY, this.lastKey, false);
+ }
+ int avgKeyLen = this.entryCount == 0? 0:
+ (int)(this.keylength/this.entryCount);
+ appendFileInfo(this.fileinfo, FileInfo.AVG_KEY_LEN,
+ Bytes.toBytes(avgKeyLen), false);
+ int avgValueLen = this.entryCount == 0? 0:
+ (int)(this.keylength/this.entryCount);
+ appendFileInfo(this.fileinfo, FileInfo.AVG_VALUE_LEN,
+ Bytes.toBytes(avgValueLen), false);
+ appendFileInfo(this.fileinfo, FileInfo.COMPARATOR,
+ Bytes.toBytes(this.comparator.getClass().getName()), false);
+ long pos = o.getPos();
+ this.fileinfo.write(o);
+ return pos;
+ }
+ }
+
+ /**
+ * HFile Reader.
+ */
+ public static class Reader implements Closeable {
+ // Stream to read from.
+ private FSDataInputStream istream;
+ // True if we should close istream when done. We don't close it if we
+ // didn't open it.
+ private boolean closeIStream;
+
+ // These are read in when the file info is loaded.
+ HFile.BlockIndex blockIndex;
+ private BlockIndex metaIndex;
+ FixedFileTrailer trailer;
+ private volatile boolean fileInfoLoaded = false;
+
+ // Filled when we read in the trailer.
+ private Compression.Algorithm compressAlgo;
+
+ // Last key in the file. Filled in when we read in the file info
+ private byte [] lastkey = null;
+ // Stats read in when we load file info.
+ private int avgKeyLen = -1;
+ private int avgValueLen = -1;
+
+ // Used to ensure we seek correctly.
+ RawComparator comparator;
+
+ // Size of this file.
+ private final long fileSize;
+
+ // Block cache to use.
+ private final BlockCache cache;
+ public int cacheHits = 0;
+ public int blockLoads = 0;
+
+ // Name for this object used when logging or in toString. Is either
+ // the result of a toString on the stream or else is toString of passed
+ // file Path plus metadata key/value pairs.
+ private String name;
+
+ /*
+ * Do not expose the default constructor.
+ */
+ @SuppressWarnings("unused")
+ private Reader() throws IOException {
+ this(null, null, null);
+ }
+
+ /**
+ * Opens a HFile. You must load the file info before you can
+ * use it by calling {@link #loadFileInfo()}.
+ *
+ * @param fs filesystem to load from
+ * @param path path within said filesystem
+ * @param cache block cache. Pass null if none.
+ * @throws IOException
+ */
+ public Reader(FileSystem fs, Path path, BlockCache cache)
+ throws IOException {
+ this(fs.open(path), fs.getFileStatus(path).getLen(), cache);
+ this.closeIStream = true;
+ this.name = path.toString();
+ }
+
+ /**
+ * Opens a HFile. You must load the index before you can
+ * use it by calling {@link #loadFileInfo()}.
+ *
+ * @param fsdis input stream. Caller is responsible for closing the passed
+ * stream.
+ * @param size Length of the stream.
+ * @param cache block cache. Pass null if none.
+ * @throws IOException
+ */
+ public Reader(final FSDataInputStream fsdis, final long size,
+ final BlockCache cache)
+ throws IOException {
+ this.cache = cache;
+ this.fileSize = size;
+ this.istream = fsdis;
+ this.closeIStream = false;
+ this.name = this.istream.toString();
+ }
+
+ public String toString() {
+ return "reader=" + this.name +
+ (!isFileInfoLoaded()? "":
+ ", compression=" + this.compressAlgo.getName() +
+ ", firstKey=" + Bytes.toString(getFirstKey()) +
+ ", lastKey=" + Bytes.toString(getLastKey()) +
+ ", avgKeyLen=" + this.avgKeyLen +
+ ", avgValueLen=" + this.avgValueLen +
+ ", entries=" + this.trailer.entryCount +
+ ", length=" + this.fileSize);
+ }
+
+ public long length() {
+ return this.fileSize;
+ }
+
+ /**
+ * Read in the index and file info.
+ * @return A map of fileinfo data.
+ * See {@link Writer#appendFileInfo(byte[], byte[])}.
+ * @throws IOException
+ */
+ public Map loadFileInfo() throws IOException {
+ this.trailer = readTrailer();
+
+ // Read in the fileinfo and get what we need from it.
+ this.istream.seek(this.trailer.fileinfoOffset);
+ FileInfo fi = new FileInfo();
+ fi.readFields(this.istream);
+ this.lastkey = fi.get(FileInfo.LASTKEY);
+ this.avgKeyLen = Bytes.toInt(fi.get(FileInfo.AVG_KEY_LEN));
+ this.avgValueLen = Bytes.toInt(fi.get(FileInfo.AVG_VALUE_LEN));
+ String clazzName = Bytes.toString(fi.get(FileInfo.COMPARATOR));
+ this.comparator = getComparator(clazzName);
+
+ // Read in the data index.
+ this.blockIndex = BlockIndex.readIndex(this.comparator, this.istream,
+ this.trailer.dataIndexOffset, this.trailer.dataIndexCount);
+
+ // Read in the metadata index.
+ if (trailer.metaIndexCount > 0) {
+ this.metaIndex = BlockIndex.readIndex(Bytes.BYTES_RAWCOMPARATOR,
+ this.istream, this.trailer.metaIndexOffset, trailer.metaIndexCount);
+ }
+ this.fileInfoLoaded = true;
+ return fi;
+ }
+
+ boolean isFileInfoLoaded() {
+ return this.fileInfoLoaded;
+ }
+
+ @SuppressWarnings("unchecked")
+ private RawComparator getComparator(final String clazzName)
+ throws IOException {
+ if (clazzName == null || clazzName.length() == 0) {
+ return null;
+ }
+ try {
+ return (RawComparator)Class.forName(clazzName).newInstance();
+ } catch (InstantiationException e) {
+ throw new IOException(e);
+ } catch (IllegalAccessException e) {
+ throw new IOException(e);
+ } catch (ClassNotFoundException e) {
+ throw new IOException(e);
+ }
+ }
+
+ /* Read the trailer off the input stream. As side effect, sets the
+ * compression algorithm.
+ * @return Populated FixedFileTrailer.
+ * @throws IOException
+ */
+ private FixedFileTrailer readTrailer() throws IOException {
+ FixedFileTrailer fft = new FixedFileTrailer();
+ long seekPoint = this.fileSize - FixedFileTrailer.trailerSize();
+ this.istream.seek(seekPoint);
+ fft.deserialize(this.istream);
+ // Set up the codec.
+ this.compressAlgo =
+ Compression.Algorithm.values()[fft.compressionCodec];
+ return fft;
+ }
+
+ /**
+ * Create a Scanner on this file. No seeks or reads are done on creation.
+ * Call {@link HFileScanner#seekTo(byte[])} to position an start the read.
+ * There is nothing to clean up in a Scanner. Letting go of your references
+ * to the scanner is sufficient.
+ * @return Scanner on this file.
+ */
+ public HFileScanner getScanner() {
+ return new Scanner(this);
+ }
+ /**
+ * @param key Key to search.
+ * @return Block number of the block containing the key or -1 if not in this
+ * file.
+ */
+ protected int blockContainingKey(final byte [] key) {
+ if (blockIndex == null) {
+ throw new RuntimeException("Block index not loaded");
+ }
+ return blockIndex.blockContainingKey(key);
+ }
+ /**
+ * @param metaBlockName
+ * @return Block wrapped in a ByteBuffer
+ * @throws IOException
+ */
+ public ByteBuffer getMetaBlock(String metaBlockName) throws IOException {
+ if (metaIndex == null) {
+ throw new IOException("Meta index not loaded");
+ }
+ int block = metaIndex.blockContainingKey(Bytes.toBytes(metaBlockName));
+ if (block == -1)
+ return null;
+ long blockSize;
+ if (block == metaIndex.count - 1) {
+ blockSize = trailer.fileinfoOffset - metaIndex.blockOffsets[block];
+ } else {
+ blockSize = metaIndex.blockOffsets[block+1] - metaIndex.blockOffsets[block];
+ }
+
+ ByteBuffer buf = decompress(metaIndex.blockOffsets[block],
+ longToInt(blockSize), metaIndex.blockDataSizes[block]);
+ byte [] magic = new byte[METABLOCKMAGIC.length];
+ buf.get(magic, 0, magic.length);
+
+ if (! Arrays.equals(magic, METABLOCKMAGIC)) {
+ throw new IOException("Meta magic is bad in block " + block);
+ }
+ // Toss the header. May have to remove later due to performance.
+ buf.compact();
+ buf.limit(buf.limit() - METABLOCKMAGIC.length);
+ buf.rewind();
+ return buf;
+ }
+ /**
+ * Read in a file block.
+ * @param block Index of block to read.
+ * @return Block wrapped in a ByteBuffer.
+ * @throws IOException
+ */
+ ByteBuffer readBlock(int block) throws IOException {
+ if (blockIndex == null) {
+ throw new IOException("Block index not loaded");
+ }
+ if (block < 0 || block > blockIndex.count) {
+ throw new IOException("Requested block is out of range: " + block +
+ ", max: " + blockIndex.count);
+ }
+
+ // For any given block from any given file, synchronize reads for said
+ // block.
+ // Without a cache, this synchronizing is needless overhead, but really
+ // the other choice is to duplicate work (which the cache would prevent you from doing).
+ synchronized (blockIndex.blockKeys[block]) {
+ blockLoads++;
+ // Check cache for block. If found return.
+ if (cache != null) {
+ ByteBuffer cachedBuf = cache.getBlock(name + block);
+ if (cachedBuf != null) {
+ // LOG.debug("Reusing block for: " + block);
+ // Return a distinct 'copy' of the block, so pos doesnt get messed by
+ // the scanner
+ cacheHits++;
+ return cachedBuf.duplicate();
+ }
+ // Carry on, please load.
+ }
+
+ // Load block from filesystem.
+ long onDiskBlockSize;
+ if (block == blockIndex.count - 1) {
+ // last block! The end of data block is first meta block if there is
+ // one or if there isn't, the fileinfo offset.
+ long offset = this.metaIndex != null?
+ this.metaIndex.blockOffsets[0]: this.trailer.fileinfoOffset;
+ onDiskBlockSize = offset - blockIndex.blockOffsets[block];
+ } else {
+ onDiskBlockSize = blockIndex.blockOffsets[block+1] -
+ blockIndex.blockOffsets[block];
+ }
+ ByteBuffer buf = decompress(blockIndex.blockOffsets[block],
+ longToInt(onDiskBlockSize), this.blockIndex.blockDataSizes[block]);
+
+ byte [] magic = new byte[DATABLOCKMAGIC.length];
+ buf.get(magic, 0, magic.length);
+ // LOG.debug("read block:"+buf.position() + " lim:" + buf.limit());
+ if (!Arrays.equals(magic, DATABLOCKMAGIC)) {
+ throw new IOException("Data magic is bad in block " + block);
+ }
+ // Toss the header. May have to remove later due to performance.
+ buf.compact();
+ buf.limit(buf.limit() - DATABLOCKMAGIC.length);
+ // LOG.debug("read block:"+buf.position() + " lim:" + buf.limit());
+ buf.rewind();
+ // LOG.debug("read block:"+buf.position() + " lim:" + buf.limit());
+
+ // Cache a copy, not the one we are sending back, so the position doesnt
+ // get messed.
+ if (cache != null) {
+ cache.cacheBlock(name + block, buf.duplicate());
+ }
+
+ return buf;
+ }
+ }
+
+ /*
+ * Decompress compressedSize bytes off the backing
+ * FSDataInputStream.
+ * @param offset
+ * @param compressedSize
+ * @param decompressedSize
+ * @return
+ * @throws IOException
+ */
+ private ByteBuffer decompress(final long offset, final int compressedSize,
+ final int decompressedSize)
+ throws IOException {
+ Decompressor decompressor = this.compressAlgo.getDecompressor();
+ // My guess is that the bounded range fis is needed to stop the
+ // decompressor reading into next block -- IIRC, it just grabs a
+ // bunch of data w/o regard to whether decompressor is coming to end of a
+ // decompression.
+ InputStream is = this.compressAlgo.createDecompressionStream(
+ new BoundedRangeFileInputStream(this.istream, offset, compressedSize),
+ decompressor, 0);
+ ByteBuffer buf = ByteBuffer.allocate(decompressedSize);
+ IOUtils.readFully(is, buf.array(), 0, buf.capacity());
+ return buf;
+ }
+
+ /**
+ * @return First key in the file.
+ */
+ public byte [] getFirstKey() {
+ if (blockIndex == null) {
+ throw new RuntimeException("Block index not loaded");
+ }
+ return blockIndex.blockKeys[0];
+ }
+
+ public int getEntries() {
+ if (!this.isFileInfoLoaded()) {
+ throw new RuntimeException("File info not loaded");
+ }
+ return this.trailer.entryCount;
+ }
+
+ /**
+ * @return Last key in the file.
+ */
+ public byte [] getLastKey() {
+ if (!isFileInfoLoaded()) {
+ throw new RuntimeException("Load file info first");
+ }
+ return this.lastkey;
+ }
+
+ /**
+ * @return Comparator.
+ */
+ public RawComparator getComparator() {
+ return this.comparator;
+ }
+
+ /**
+ * @return index size
+ */
+ public long indexSize() {
+ return (this.blockIndex != null? this.blockIndex.heapSize(): 0) +
+ ((this.metaIndex != null)? this.metaIndex.heapSize(): 0);
+ }
+
+ /**
+ * @return Midkey for this file. We work with block boundaries only so
+ * returned midkey is an approximation only.
+ * @throws IOException
+ */
+ public byte [] midkey() throws IOException {
+ if (!isFileInfoLoaded() || this.blockIndex.isEmpty()) {
+ return null;
+ }
+ return this.blockIndex.midkey();
+ }
+
+ public void close() throws IOException {
+ if (this.closeIStream && this.istream != null) {
+ this.istream.close();
+ this.istream = null;
+ }
+ }
+
+ /*
+ * Implementation of {@link HFileScanner} interface.
+ */
+ private static class Scanner implements HFileScanner {
+ private final Reader reader;
+ private ByteBuffer block;
+ private int currBlock;
+
+ private int currKeyLen = 0;
+ private int currValueLen = 0;
+
+ public int blockFetches = 0;
+
+ public Scanner(Reader r) {
+ this.reader = r;
+ }
+
+ public ByteBuffer getKey() {
+ if (this.block == null || this.currKeyLen == 0) {
+ throw new RuntimeException("you need to seekTo() before calling getKey()");
+ }
+ ByteBuffer keyBuff = this.block.slice();
+ keyBuff.limit(this.currKeyLen);
+ keyBuff.rewind();
+ // Do keyBuff.asReadOnly()?
+ return keyBuff;
+ }
+
+ public ByteBuffer getValue() {
+ if (block == null || currKeyLen == 0) {
+ throw new RuntimeException("you need to seekTo() before calling getValue()");
+ }
+ // TODO: Could this be done with one ByteBuffer rather than create two?
+ ByteBuffer valueBuff = this.block.slice();
+ valueBuff.position(this.currKeyLen);
+ valueBuff = valueBuff.slice();
+ valueBuff.limit(currValueLen);
+ valueBuff.rewind();
+ return valueBuff;
+ }
+
+ public boolean next() throws IOException {
+ // LOG.deug("rem:" + block.remaining() + " p:" + block.position() +
+ // " kl: " + currKeyLen + " kv: " + currValueLen);
+ if (block == null) {
+ throw new IOException("Next called on non-seeked scanner");
+ }
+ block.position(block.position() + currKeyLen + currValueLen);
+ if (block.remaining() <= 0) {
+ // LOG.debug("Fetch next block");
+ currBlock++;
+ if (currBlock >= reader.blockIndex.count) {
+ // damn we are at the end
+ currBlock = 0;
+ block = null;
+ return false;
+ }
+ block = reader.readBlock(currBlock);
+ currKeyLen = block.getInt();
+ currValueLen = block.getInt();
+ blockFetches++;
+ return true;
+ }
+ // LOG.debug("rem:" + block.remaining() + " p:" + block.position() +
+ // " kl: " + currKeyLen + " kv: " + currValueLen);
+
+ currKeyLen = block.getInt();
+ currValueLen = block.getInt();
+ return true;
+ }
+
+ public int seekTo(byte[] key) throws IOException {
+ int b = reader.blockContainingKey(key);
+ if (b < 0) return -1; // falls before the beginning of the file! :-(
+ // Avoid re-reading the same block (that'd be dumb).
+ loadBlock(b);
+
+ return blockSeek(key, false);
+ }
+
+ /**
+ * Within a loaded block, seek looking for the first key
+ * that is smaller than (or equal to?) the key we are interested in.
+ *
+ * A note on the seekBefore - if you have seekBefore = true, AND the
+ * first key in the block = key, then you'll get thrown exceptions.
+ * @param key to find
+ * @param seekBefore find the key before the exact match.
+ * @return
+ */
+ private int blockSeek(byte[] key, boolean seekBefore) {
+ int klen, vlen;
+ int lastLen = 0;
+ do {
+ klen = block.getInt();
+ vlen = block.getInt();
+ int comp = this.reader.comparator.compare(key, 0, key.length,
+ block.array(), block.arrayOffset() + block.position(), klen);
+ if (comp == 0) {
+ if (seekBefore) {
+ block.position(block.position() - lastLen - 16);
+ currKeyLen = block.getInt();
+ currValueLen = block.getInt();
+ return 1; // non exact match.
+ }
+ currKeyLen = klen;
+ currValueLen = vlen;
+ return 0; // indicate exact match
+ }
+ if (comp < 0) {
+ // go back one key:
+ block.position(block.position() - lastLen - 16);
+ currKeyLen = block.getInt();
+ currValueLen = block.getInt();
+ return 1;
+ }
+ block.position(block.position() + klen + vlen);
+ lastLen = klen + vlen ;
+ } while( block.remaining() > 0 );
+ // ok we are at the end, so go back a littleeeeee....
+ block.position(block.position() - lastLen - 8);
+ currKeyLen = block.getInt();
+ currValueLen = block.getInt();
+ return 1; // didn't exactly find it.
+ }
+
+ public boolean seekBefore(byte[] key) throws IOException {
+ int b = reader.blockContainingKey(key);
+ if (b < 0)
+ return false; // key is before the start of the file.
+
+ // Question: does this block begin with 'key'?
+ if (this.reader.comparator.compare(reader.blockIndex.blockKeys[b], key) == 0) {
+ // Ok the key we're interested in is the first of the block, so go back one.
+ if (b == 0) {
+ // we have a 'problem', the key we want is the first of the file.
+ return false;
+ }
+ b--;
+ // TODO shortcut: seek forward in this block to the last key of the block.
+ }
+ loadBlock(b);
+ blockSeek(key, true);
+ return true;
+ }
+
+ public String getKeyString() {
+ return Bytes.toString(block.array(), block.arrayOffset() +
+ block.position(), currKeyLen);
+ }
+
+ public String getValueString() {
+ return Bytes.toString(block.array(), block.arrayOffset() +
+ block.position() + currKeyLen, currValueLen);
+ }
+
+ public Reader getReader() {
+ return this.reader;
+ }
+
+ public boolean isSeeked(){
+ return this.block != null;
+ }
+
+ public boolean seekTo() throws IOException {
+ if (this.reader.blockIndex.isEmpty()) {
+ return false;
+ }
+ if (block != null && currBlock == 0) {
+ block.rewind();
+ currKeyLen = block.getInt();
+ currValueLen = block.getInt();
+ }
+ currBlock = 0;
+ block = reader.readBlock(currBlock);
+ currKeyLen = block.getInt();
+ currValueLen = block.getInt();
+ blockFetches++;
+ return true;
+ }
+
+ private void loadBlock(int bloc) throws IOException {
+ if (block == null) {
+ block = reader.readBlock(bloc);
+ currBlock = bloc;
+ blockFetches++;
+ } else {
+ if (bloc != currBlock) {
+ block = reader.readBlock(bloc);
+ currBlock = bloc;
+ blockFetches++;
+ } else {
+ // we are already in the same block, just rewind to seek again.
+ block.rewind();
+ }
+ }
+ }
+ }
+ }
+ /*
+ * The RFile has a fixed trailer which contains offsets to other variable
+ * parts of the file. Also includes basic metadata on this file.
+ */
+ private static class FixedFileTrailer {
+ // Offset to the data block index.
+ long dataIndexOffset;
+ // Offset to the fileinfo data, a small block of vitals..
+ long fileinfoOffset;
+ // How many index counts are there (aka: block count)
+ int dataIndexCount;
+ // Offset to the meta block index.
+ long metaIndexOffset;
+ // How many meta block index entries (aka: meta block count)
+ int metaIndexCount;
+ long totalUncompressedBytes;
+ int entryCount;
+ int compressionCodec;
+ int version = 1;
+
+ FixedFileTrailer() {
+ super();
+ }
+
+ static int trailerSize() {
+ // Keep this up to date...
+ final int intSize = 4;
+ final int longSize = 8;
+ return
+ ( intSize * 5 ) +
+ ( longSize * 4 ) +
+ TRAILERBLOCKMAGIC.length;
+ }
+
+ void serialize(DataOutputStream outputStream) throws IOException {
+ outputStream.write(TRAILERBLOCKMAGIC);
+ outputStream.writeLong(fileinfoOffset);
+ outputStream.writeLong(dataIndexOffset);
+ outputStream.writeInt(dataIndexCount);
+ outputStream.writeLong(metaIndexOffset);
+ outputStream.writeInt(metaIndexCount);
+ outputStream.writeLong(totalUncompressedBytes);
+ outputStream.writeInt(entryCount);
+ outputStream.writeInt(compressionCodec);
+ outputStream.writeInt(version);
+ }
+
+ void deserialize(DataInputStream inputStream) throws IOException {
+ byte [] header = new byte[TRAILERBLOCKMAGIC.length];
+ inputStream.readFully(header);
+ if ( !Arrays.equals(header, TRAILERBLOCKMAGIC)) {
+ throw new IOException("Trailer 'header' is wrong; does the trailer " +
+ "size match content?");
+ }
+ fileinfoOffset = inputStream.readLong();
+ dataIndexOffset = inputStream.readLong();
+ dataIndexCount = inputStream.readInt();
+
+ metaIndexOffset = inputStream.readLong();
+ metaIndexCount = inputStream.readInt();
+
+ totalUncompressedBytes = inputStream.readLong();
+ entryCount = inputStream.readInt();
+ compressionCodec = inputStream.readInt();
+ version = inputStream.readInt();
+
+ if (version != 1) {
+ throw new IOException("Wrong version: " + version);
+ }
+ }
+
+ public String toString() {
+ return "fileinfoOffset=" + fileinfoOffset +
+ ", dataIndexOffset=" + dataIndexOffset +
+ ", dataIndexCount=" + dataIndexCount +
+ ", metaIndexOffset=" + metaIndexOffset +
+ ", metaIndexCount=" + metaIndexCount +
+ ", totalBytes=" + totalUncompressedBytes +
+ ", entryCount=" + entryCount +
+ ", version=" + version;
+ }
+ }
+
+ /*
+ * The block index for a RFile.
+ * Used reading.
+ */
+ static class BlockIndex implements HeapSize {
+ // How many actual items are there? The next insert location too.
+ int count = 0;
+ byte [][] blockKeys;
+ long [] blockOffsets;
+ int [] blockDataSizes;
+ int size = 0;
+
+ /* Needed doing lookup on blocks.
+ */
+ RawComparator comparator;
+
+ /*
+ * Shutdown default constructor
+ */
+ @SuppressWarnings("unused")
+ private BlockIndex() {
+ this(null);
+ }
+
+ /**
+ * Constructor
+ * @param trailer File tail structure with index stats.
+ */
+ BlockIndex(final RawComparator c) {
+ this.comparator = c;
+ // Guess that cost of three arrays + this object is 4 * 8 bytes.
+ this.size += (4 * 8);
+ }
+
+ /**
+ * @return True if block index is empty.
+ */
+ boolean isEmpty() {
+ return this.blockKeys.length <= 0;
+ }
+
+ /**
+ * Adds a new entry in the block index.
+ *
+ * @param key Last key in the block
+ * @param offset file offset where the block is stored
+ * @param dataSize the uncompressed data size
+ */
+ void add(final byte[] key, final long offset, final int dataSize) {
+ blockOffsets[count] = offset;
+ blockKeys[count] = key;
+ blockDataSizes[count] = dataSize;
+ count++;
+ this.size += (Bytes.SIZEOF_INT * 2 + key.length);
+ }
+
+ /**
+ * @param key Key to find
+ * @return Offset of block containing key or -1 if this file
+ * does not contain the request.
+ */
+ int blockContainingKey(final byte[] key) {
+ int pos = Arrays.binarySearch(blockKeys, key, this.comparator);
+ if (pos < 0) {
+ pos ++;
+ pos *= -1;
+ if (pos == 0) {
+ // falls before the beginning of the file.
+ return -1;
+ }
+ // When switched to "first key in block" index, binarySearch now returns
+ // the block with a firstKey < key. This means the value we want is potentially
+ // in the next block.
+ pos --; // in previous block.
+
+ return pos;
+ }
+ // wow, a perfect hit, how unlikely?
+ return pos;
+ }
+
+ /*
+ * @return File midkey. Inexact. Operates on block boundaries. Does
+ * not go into blocks.
+ */
+ byte [] midkey() throws IOException {
+ int pos = ((this.count - 1)/2); // middle of the index
+ if (pos < 0) {
+ throw new IOException("HFile empty");
+ }
+ return this.blockKeys[pos];
+ }
+
+ /*
+ * Write out index. Whatever we write here must jibe with what
+ * BlockIndex#readIndex is expecting. Make sure the two ends of the
+ * index serialization match.
+ * @param o
+ * @param keys
+ * @param offsets
+ * @param sizes
+ * @param c
+ * @return Position at which we entered the index.
+ * @throws IOException
+ */
+ static long writeIndex(final FSDataOutputStream o,
+ final List keys, final List offsets,
+ final List sizes)
+ throws IOException {
+ long pos = o.getPos();
+ // Don't write an index if nothing in the index.
+ if (keys.size() > 0) {
+ o.write(INDEXBLOCKMAGIC);
+ // Write the index.
+ for (int i = 0; i < keys.size(); ++i) {
+ o.writeLong(offsets.get(i).longValue());
+ o.writeInt(sizes.get(i).intValue());
+ byte [] key = keys.get(i);
+ Bytes.writeByteArray(o, key);
+ }
+ }
+ return pos;
+ }
+
+ /*
+ * Read in the index that is at indexOffset
+ * Must match what was written by writeIndex in the Writer.close.
+ * @param in
+ * @param indexOffset
+ * @throws IOException
+ */
+ static BlockIndex readIndex(final RawComparator c,
+ final FSDataInputStream in, final long indexOffset, final int indexSize)
+ throws IOException {
+ BlockIndex bi = new BlockIndex(c);
+ bi.blockOffsets = new long[indexSize];
+ bi.blockKeys = new byte[indexSize][];
+ bi.blockDataSizes = new int[indexSize];
+ // If index size is zero, no index was written.
+ if (indexSize > 0) {
+ in.seek(indexOffset);
+ byte [] magic = new byte[INDEXBLOCKMAGIC.length];
+ IOUtils.readFully(in, magic, 0, magic.length);
+ if (!Arrays.equals(magic, INDEXBLOCKMAGIC)) {
+ throw new IOException("Index block magic is wrong: " +
+ Arrays.toString(magic));
+ }
+ for (int i = 0; i < indexSize; ++i ) {
+ long offset = in.readLong();
+ int dataSize = in.readInt();
+ byte [] key = Bytes.readByteArray(in);
+ bi.add(key, offset, dataSize);
+ }
+ }
+ return bi;
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("size=" + count);
+ for (int i = 0; i < count ; i++) {
+ sb.append(", ");
+ sb.append("key=").append(Bytes.toString(blockKeys[i])).
+ append(", offset=").append(blockOffsets[i]).
+ append(", dataSize=" + blockDataSizes[i]);
+ }
+ return sb.toString();
+ }
+
+ public long heapSize() {
+ return this.size;
+ }
+ }
+
+ /*
+ * Metadata for this file. Conjured by the writer. Read in by the reader.
+ */
+ static class FileInfo extends HbaseMapWritable {
+ static final String RESERVED_PREFIX = "hfile.";
+ static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
+ static final byte [] AVG_KEY_LEN =
+ Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
+ static final byte [] AVG_VALUE_LEN =
+ Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
+ static final byte [] COMPARATOR =
+ Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
+
+ /*
+ * Constructor.
+ */
+ FileInfo() {
+ super();
+ }
+ }
+
+ /**
+ * Get names of supported compression algorithms. The names are acceptable by
+ * HFile.Writer.
+ *
+ * @return Array of strings, each represents a supported compression
+ * algorithm. Currently, the following compression algorithms are
+ * supported.
+ *
+ * - "none" - No compression.
+ *
- "gz" - GZIP compression.
+ *
+ */
+ public static String[] getSupportedCompressionAlgorithms() {
+ return Compression.getSupportedAlgorithms();
+ }
+
+ // Utility methods.
+ /*
+ * @param l Long to convert to an int.
+ * @return l cast as an int.
+ */
+ static int longToInt(final long l) {
+ // Expecting the size() of a block not exceeding 4GB. Assuming the
+ // size() will wrap to negative integer if it exceeds 2GB (From tfile).
+ return (int)(l & 0x00000000ffffffffL);
+ }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java b/src/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java
new file mode 100644
index 0000000..b16ee76
--- /dev/null
+++ b/src/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java
@@ -0,0 +1,112 @@
+/**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * A scanner allows you to position yourself within a HFile and
+ * scan through it. It allows you to reposition yourself as well.
+ *
+ * A scanner doesn't always have a key/value that it is pointing to
+ * when it is first created and before
+ * {@link #seekTo()}/{@link #seekTo(byte[])} are called.
+ * In this case, {@link #getKey()}/{@link #getValue()} returns null. At most
+ * other times, a key and value will be available. The general pattern is that
+ * you position the Scanner using the seekTo variants and then getKey and
+ * getValue.
+ */
+public interface HFileScanner {
+ /**
+ * SeekTo or just before the passed key. Examine the return
+ * code to figure whether we found the key or not.
+ * Consider the key stream of all the keys in the file,
+ * k[0] .. k[n], where there are n keys in the file.
+ * @param key Key to find.
+ * @return -1, if key < k[0], no position;
+ * 0, such that k[i] = key and scanner is left in position i; and
+ * 1, such that k[i] < key, and scanner is left in position i.
+ * Furthermore, there may be a k[i+1], such that k[i] < key < k[i+1]
+ * but there may not be a k[i+1], and next() will return false (EOF).
+ * @throws IOException
+ */
+ public int seekTo(byte[] key) throws IOException;
+ /**
+ * Consider the key stream of all the keys in the file,
+ * k[0] .. k[n], where there are n keys in the file.
+ * @param key Key to find
+ * @return false if key <= k[0] or true with scanner in position 'i' such
+ * that: k[i] < key. Furthermore: there may be a k[i+1], such that
+ * k[i] < key <= k[i+1] but there may also NOT be a k[i+1], and next() will
+ * return false (EOF).
+ */
+ public boolean seekBefore(byte [] key) throws IOException;
+ /**
+ * Positions this scanner at the start of the file.
+ * @return False if empty file; i.e. a call to next would return false and
+ * the current key and value are undefined.
+ * @throws IOException
+ */
+ public boolean seekTo() throws IOException;
+ /**
+ * Scans to the next entry in the file.
+ * @return Returns false if you are at the end otherwise true if more in file.
+ * @throws IOException
+ */
+ public boolean next() throws IOException;
+ /**
+ * Gets a buffer view to the current key. You must call
+ * {@link #seekTo(byte[])} before this method.
+ * @return byte buffer for the key. The limit is set to the key size, and the
+ * position is 0, the start of the buffer view.
+ */
+ public ByteBuffer getKey();
+ /**
+ * Gets a buffer view to the current value. You must call
+ * {@link #seekTo(byte[])} before this method.
+ *
+ * @return byte buffer for the value. The limit is set to the value size, and
+ * the position is 0, the start of the buffer view.
+ */
+ public ByteBuffer getValue();
+ /**
+ * Convenience method to get a copy of the key as a string - interpreting the
+ * bytes as UTF8. You must call {@link #seekTo(byte[])} before this method.
+ * @return key as a string
+ */
+ public String getKeyString();
+ /**
+ * Convenience method to get a copy of the value as a string - interpreting
+ * the bytes as UTF8. You must call {@link #seekTo(byte[])} before this method.
+ * @return value as a string
+ */
+ public String getValueString();
+ /**
+ * @return Reader that underlies this Scanner instance.
+ */
+ public HFile.Reader getReader();
+ /**
+ * @return True is scanner has had one of the seek calls invoked; i.e.
+ * {@link #seekBefore(byte[])} or {@link #seekTo()} or {@link #seekTo(byte[])}.
+ * Otherwise returns false.
+ */
+ public boolean isSeeked();
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/SimpleBlockCache.java b/src/java/org/apache/hadoop/hbase/io/hfile/SimpleBlockCache.java
new file mode 100644
index 0000000..7f934e1
--- /dev/null
+++ b/src/java/org/apache/hadoop/hbase/io/hfile/SimpleBlockCache.java
@@ -0,0 +1,56 @@
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.lang.ref.ReferenceQueue;
+import java.lang.ref.SoftReference;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Map;
+
+
+/**
+ * Simple one RFile soft reference cache.
+ */
+public class SimpleBlockCache implements BlockCache {
+ private static class Ref extends SoftReference {
+ public String blockId;
+ public Ref(String blockId, ByteBuffer buf, ReferenceQueue q) {
+ super(buf, q);
+ this.blockId = blockId;
+ }
+ }
+ private Map cache =
+ new HashMap();
+
+ private ReferenceQueue q = new ReferenceQueue();
+ public int dumps = 0;
+
+ public SimpleBlockCache() {
+ super();
+ }
+
+ void processQueue() {
+ Ref r;
+ while ( (r = (Ref)q.poll()) != null) {
+ cache.remove(r.blockId);
+ dumps++;
+ }
+ }
+
+ public synchronized int size() {
+ processQueue();
+ return cache.size();
+ }
+ @Override
+ public synchronized ByteBuffer getBlock(String blockName) {
+ processQueue(); // clear out some crap.
+ Ref ref = cache.get(blockName);
+ if (ref == null)
+ return null;
+ return ref.get();
+ }
+
+ @Override
+ public synchronized void cacheBlock(String blockName, ByteBuffer buf) {
+ cache.put(blockName, new Ref(blockName, buf, q));
+ }
+}
diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/package.html b/src/java/org/apache/hadoop/hbase/io/hfile/package.html
new file mode 100644
index 0000000..fa9244f
--- /dev/null
+++ b/src/java/org/apache/hadoop/hbase/io/hfile/package.html
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+Provides the hbase data+index+metadata file.
+
+
diff --git a/src/java/org/apache/hadoop/hbase/master/BaseScanner.java b/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
index 52bb7fc..86888d7 100644
--- a/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
+++ b/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
@@ -20,36 +20,34 @@
package org.apache.hadoop.hbase.master;
import java.io.IOException;
-import java.util.concurrent.atomic.AtomicBoolean;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.HashMap;
+import java.util.concurrent.atomic.AtomicBoolean;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.ipc.RemoteException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.Chore;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerInfo;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.UnknownScannerException;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.RowResult;
-
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.HStoreFile;
-import org.apache.hadoop.hbase.regionserver.HStore;
-import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.regionserver.HLog;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.Store;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.ipc.RemoteException;
/**
@@ -292,19 +290,16 @@ abstract class BaseScanner extends Chore implements HConstants {
if (split == null) {
return result;
}
- Path tabledir = HTableDescriptor.getTableDir(this.master.rootdir,
- split.getTableDesc().getName());
+ Path tabledir = new Path(this.master.rootdir, split.getTableDesc().getNameAsString());
for (HColumnDescriptor family: split.getTableDesc().getFamilies()) {
- Path p = HStoreFile.getMapDir(tabledir, split.getEncodedName(),
+ Path p = Store.getStoreHomedir(tabledir, split.getEncodedName(),
family.getName());
-
// Look for reference files. Call listStatus with an anonymous
// instance of PathFilter.
-
FileStatus [] ps = this.master.fs.listStatus(p,
new PathFilter () {
public boolean accept(Path path) {
- return HStore.isReference(path);
+ return StoreFile.isReference(path);
}
}
);
diff --git a/src/java/org/apache/hadoop/hbase/master/DeleteColumn.java b/src/java/org/apache/hadoop/hbase/master/DeleteColumn.java
index d870abd..75b8cad 100644
--- a/src/java/org/apache/hadoop/hbase/master/DeleteColumn.java
+++ b/src/java/org/apache/hadoop/hbase/master/DeleteColumn.java
@@ -21,10 +21,10 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
-import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.regionserver.Store;
/** Instantiated to remove a column family from a table */
class DeleteColumn extends ColumnOperation {
@@ -40,13 +40,14 @@ class DeleteColumn extends ColumnOperation {
@Override
protected void postProcessMeta(MetaRegion m, HRegionInterface server)
throws IOException {
- Path tabledir = new Path(this.master.rootdir, tableName.toString());
for (HRegionInfo i: unservedRegions) {
i.getTableDesc().removeFamily(columnName);
updateRegionInfo(server, m.getRegionName(), i);
// Delete the directories used by the column
- FSUtils.deleteColumnFamily(this.master.fs, tabledir, i.getEncodedName(),
- this.columnName);
+ Path tabledir =
+ new Path(this.master.rootdir, i.getTableDesc().getNameAsString());
+ this.master.fs.delete(Store.getStoreHomedir(tabledir, i.getEncodedName(),
+ this.columnName), true);
}
}
}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/master/MetaRegion.java b/src/java/org/apache/hadoop/hbase/master/MetaRegion.java
index f1f60a8..66d09d8 100644
--- a/src/java/org/apache/hadoop/hbase/master/MetaRegion.java
+++ b/src/java/org/apache/hadoop/hbase/master/MetaRegion.java
@@ -20,7 +20,6 @@
package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.util.Bytes;
@@ -88,8 +87,7 @@ public class MetaRegion implements Comparable {
public int compareTo(MetaRegion other) {
int result = Bytes.compareTo(this.regionName, other.getRegionName());
if(result == 0) {
- result = HStoreKey.compareTwoRowKeys(HRegionInfo.FIRST_META_REGIONINFO,
- this.startKey, other.getStartKey());
+ result = HStoreKey.compareTwoRowKeys(this.startKey, other.getStartKey());
if (result == 0) {
// Might be on different host?
result = this.server.compareTo(other.server);
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HLog.java b/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
index 80edb34..d0a00bd 100644
--- a/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
@@ -655,7 +655,7 @@ public class HLog implements HConstants, Syncable {
}
synchronized (updateLock) {
this.writer.append(new HLogKey(regionName, tableName, HLog.METAROW, logSeqId),
- new HLogEdit(HLog.METACOLUMN, HLogEdit.completeCacheFlush.get(),
+ new HLogEdit(HLog.METACOLUMN, HLogEdit.COMPLETE_CACHE_FLUSH,
System.currentTimeMillis()));
this.numEntries++;
Long seq = this.lastSeqWritten.get(regionName);
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HLogEdit.java b/src/java/org/apache/hadoop/hbase/regionserver/HLogEdit.java
index 19b73db..be4bb51 100644
--- a/src/java/org/apache/hadoop/hbase/regionserver/HLogEdit.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HLogEdit.java
@@ -20,11 +20,11 @@
package org.apache.hadoop.hbase.regionserver;
import org.apache.hadoop.hbase.io.BatchOperation;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.*;
import java.io.*;
+import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.HConstants;
@@ -38,19 +38,15 @@ import org.apache.hadoop.hbase.HConstants;
public class HLogEdit implements Writable, HConstants {
/** Value stored for a deleted item */
- public static ImmutableBytesWritable deleteBytes = null;
+ public static byte [] DELETED_BYTES = null;
/** Value written to HLog on a complete cache flush */
- public static ImmutableBytesWritable completeCacheFlush = null;
+ public static byte [] COMPLETE_CACHE_FLUSH = null;
static {
try {
- deleteBytes =
- new ImmutableBytesWritable("HBASE::DELETEVAL".getBytes(UTF8_ENCODING));
-
- completeCacheFlush =
- new ImmutableBytesWritable("HBASE::CACHEFLUSH".getBytes(UTF8_ENCODING));
-
+ DELETED_BYTES = "HBASE::DELETEVAL".getBytes(UTF8_ENCODING);
+ COMPLETE_CACHE_FLUSH = "HBASE::CACHEFLUSH".getBytes(UTF8_ENCODING);
} catch (UnsupportedEncodingException e) {
assert(false);
}
@@ -58,12 +54,31 @@ public class HLogEdit implements Writable, HConstants {
/**
* @param value
- * @return True if an entry and its content is {@link #deleteBytes}.
+ * @return True if an entry and its content is {@link #DELETED_BYTES}.
*/
public static boolean isDeleted(final byte [] value) {
- return (value == null)? false: deleteBytes.compareTo(value) == 0;
+ return isDeleted(value, 0, value.length);
}
-
+
+ /**
+ * @param value
+ * @return True if an entry and its content is {@link #DELETED_BYTES}.
+ */
+ public static boolean isDeleted(final ByteBuffer value) {
+ return isDeleted(value.array(), value.arrayOffset(), value.limit());
+ }
+
+ /**
+ * @param value
+ * @return True if an entry and its content is {@link #DELETED_BYTES}.
+ */
+ public static boolean isDeleted(final byte [] value, final int offset,
+ final int length) {
+ return (value == null)? false:
+ Bytes.BYTES_RAWCOMPARATOR.compare(DELETED_BYTES, 0, DELETED_BYTES.length,
+ value, offset, length) == 0;
+ }
+
/** If transactional log entry, these are the op codes */
public enum TransactionalOperation {
/** start transaction */
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 01b786f..41247e5 100644
--- a/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -26,7 +26,6 @@ import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Random;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
@@ -58,8 +57,8 @@ import org.apache.hadoop.hbase.io.BatchOperation;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
-import org.apache.hadoop.hbase.io.Reference;
import org.apache.hadoop.hbase.io.RowResult;
+import org.apache.hadoop.hbase.io.Reference.Range;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
@@ -105,10 +104,9 @@ import org.apache.hadoop.util.StringUtils;
* defines the keyspace for this HRegion.
*/
public class HRegion implements HConstants {
+ static final Log LOG = LogFactory.getLog(HRegion.class);
static final String SPLITDIR = "splits";
static final String MERGEDIR = "merges";
- static final Random rand = new Random();
- static final Log LOG = LogFactory.getLog(HRegion.class);
final AtomicBoolean closed = new AtomicBoolean(false);
/* Closing can take some time; use the closing flag if there is stuff we don't want
* to do while in closing state; e.g. like offer this region up to the master as a region
@@ -125,11 +123,11 @@ public class HRegion implements HConstants {
new ConcurrentHashMap();
private final Map> targetColumns =
new ConcurrentHashMap>();
- // Default access because read by tests.
- protected final Map stores =
- new ConcurrentHashMap();
+ protected final Map stores =
+ new ConcurrentHashMap();
final AtomicLong memcacheSize = new AtomicLong(0);
+ // This is the table subdirectory.
final Path basedir;
final HLog log;
final FileSystem fs;
@@ -137,7 +135,7 @@ public class HRegion implements HConstants {
final HRegionInfo regionInfo;
final Path regiondir;
private final Path regionCompactionDir;
-
+
/*
* Set this when scheduling compaction if want the next compaction to be a
* major compaction. Cleared each time through compaction code.
@@ -158,7 +156,7 @@ public class HRegion implements HConstants {
// Gets set in close. If set, cannot compact or flush again.
volatile boolean writesEnabled = true;
// Set if region is read-only
- private volatile boolean readOnly = false;
+ volatile boolean readOnly = false;
/**
* Set flags that make this region read-only.
@@ -233,34 +231,33 @@ public class HRegion implements HConstants {
String encodedNameStr = Integer.toString(this.regionInfo.getEncodedName());
this.regiondir = new Path(basedir, encodedNameStr);
this.historian = RegionHistorian.getInstance();
-
if (LOG.isDebugEnabled()) {
+ // Write out region name as string and its encoded name.
LOG.debug("Opening region " + this + "/" +
this.regionInfo.getEncodedName());
}
-
this.regionCompactionDir =
new Path(getCompactionDir(basedir), encodedNameStr);
-
int flushSize = regionInfo.getTableDesc().getMemcacheFlushSize();
if (flushSize == HTableDescriptor.DEFAULT_MEMCACHE_FLUSH_SIZE) {
flushSize = conf.getInt("hbase.hregion.memcache.flush.size",
HTableDescriptor.DEFAULT_MEMCACHE_FLUSH_SIZE);
}
this.memcacheFlushSize = flushSize;
-
this.blockingMemcacheSize = this.memcacheFlushSize *
conf.getInt("hbase.hregion.memcache.block.multiplier", 1);
}
-
- /** Initialize this region and get it ready to roll.
+
+ /**
+ * Initialize this region and get it ready to roll.
+ * Called after construction.
*
* @param initialFiles
* @param reporter
* @throws IOException
*/
- public void initialize( Path initialFiles,
- final Progressable reporter) throws IOException {
+ public void initialize( Path initialFiles, final Progressable reporter)
+ throws IOException {
Path oldLogFile = new Path(regiondir, HREGION_OLDLOGFILE_NAME);
// Move prefab HStore files into place (if any). This picks up split files
@@ -273,19 +270,19 @@ public class HRegion implements HConstants {
long maxSeqId = -1;
long minSeqId = Integer.MAX_VALUE;
for (HColumnDescriptor c : this.regionInfo.getTableDesc().getFamilies()) {
- HStore store = instantiateHStore(this.basedir, c, oldLogFile, reporter);
- stores.put(Bytes.mapKey(c.getName()), store);
+ Store store = instantiateHStore(this.basedir, c, oldLogFile, reporter);
+ this.stores.put(Bytes.mapKey(c.getName()), store);
long storeSeqId = store.getMaxSequenceId();
if (storeSeqId > maxSeqId) {
maxSeqId = storeSeqId;
- }
+ }
if (storeSeqId < minSeqId) {
minSeqId = storeSeqId;
}
}
-
+
+ // Play log if one. Delete when done.
doReconstructionLog(oldLogFile, minSeqId, maxSeqId, reporter);
-
if (fs.exists(oldLogFile)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Deleting old log file: " + oldLogFile);
@@ -302,14 +299,9 @@ public class HRegion implements HConstants {
}
// Get rid of any splits or merges that were lost in-progress
- Path splits = new Path(regiondir, SPLITDIR);
- if (fs.exists(splits)) {
- fs.delete(splits, true);
- }
- Path merges = new Path(regiondir, MERGEDIR);
- if (fs.exists(merges)) {
- fs.delete(merges, true);
- }
+ FSUtils.deleteDirectory(this.fs, new Path(regiondir, SPLITDIR));
+ FSUtils.deleteDirectory(this.fs, new Path(regiondir, MERGEDIR));
+
// See if region is meant to run read-only.
if (this.regionInfo.getTableDesc().isReadOnly()) {
this.writestate.setReadOnly(true);
@@ -346,7 +338,7 @@ public class HRegion implements HConstants {
public boolean isClosing() {
return this.closing.get();
}
-
+
/**
* Close down this HRegion. Flush the cache, shut down each HStore, don't
* service any more calls.
@@ -360,10 +352,10 @@ public class HRegion implements HConstants {
*
* @throws IOException
*/
- public List close() throws IOException {
+ public List close() throws IOException {
return close(false);
}
-
+
/**
* Close down this HRegion. Flush the cache unless abort parameter is true,
* Shut down each HStore, don't service any more calls.
@@ -378,7 +370,7 @@ public class HRegion implements HConstants {
*
* @throws IOException
*/
- List close(boolean abort) throws IOException {
+ List close(final boolean abort) throws IOException {
if (isClosed()) {
LOG.warn("region " + this + " already closed");
return null;
@@ -433,8 +425,8 @@ public class HRegion implements HConstants {
internalFlushcache();
}
- List result = new ArrayList();
- for (HStore store: stores.values()) {
+ List result = new ArrayList();
+ for (Store store: stores.values()) {
result.addAll(store.close());
}
this.closed.set(true);
@@ -513,7 +505,7 @@ public class HRegion implements HConstants {
/** @return returns size of largest HStore. */
public long getLargestHStoreSize() {
long size = 0;
- for (HStore h: stores.values()) {
+ for (Store h: stores.values()) {
long storeSize = h.getSize();
if (storeSize > size) {
size = storeSize;
@@ -521,17 +513,17 @@ public class HRegion implements HConstants {
}
return size;
}
-
+
/*
* Split the HRegion to create two brand-new ones. This also closes
* current HRegion. Split should be fast since we don't rewrite store files
* but instead create new 'reference' store files that read off the top and
* bottom ranges of parent store files.
- * @param midKey key on which to split region
+ * @param splitRow row on which to split region
* @return two brand-new (and open) HRegions or null if a split is not needed
* @throws IOException
*/
- HRegion[] splitRegion(final byte [] midKey) throws IOException {
+ HRegion[] splitRegion(final byte [] splitRow) throws IOException {
synchronized (splitLock) {
if (closed.get()) {
return null;
@@ -539,11 +531,11 @@ public class HRegion implements HConstants {
// Add start/end key checking: hbase-428.
byte [] startKey = this.regionInfo.getStartKey();
byte [] endKey = this.regionInfo.getEndKey();
- if (HStoreKey.equalsTwoRowKeys(this.regionInfo,startKey, midKey)) {
+ if (HStoreKey.equalsTwoRowKeys(startKey, splitRow)) {
LOG.debug("Startkey and midkey are same, not splitting");
return null;
}
- if (HStoreKey.equalsTwoRowKeys(this.regionInfo,midKey, endKey)) {
+ if (HStoreKey.equalsTwoRowKeys(splitRow, endKey)) {
LOG.debug("Endkey and midkey are same, not splitting");
return null;
}
@@ -561,14 +553,14 @@ public class HRegion implements HConstants {
rid = this.regionInfo.getRegionId() + 1;
}
HRegionInfo regionAInfo = new HRegionInfo(this.regionInfo.getTableDesc(),
- startKey, midKey, false, rid);
+ startKey, splitRow, false, rid);
Path dirA =
new Path(splits, Integer.toString(regionAInfo.getEncodedName()));
if(fs.exists(dirA)) {
throw new IOException("Cannot split; target file collision at " + dirA);
}
HRegionInfo regionBInfo = new HRegionInfo(this.regionInfo.getTableDesc(),
- midKey, endKey, false, rid);
+ splitRow, endKey, false, rid);
Path dirB =
new Path(splits, Integer.toString(regionBInfo.getEncodedName()));
if(this.fs.exists(dirB)) {
@@ -578,38 +570,31 @@ public class HRegion implements HConstants {
// Now close the HRegion. Close returns all store files or null if not
// supposed to close (? What to do in this case? Implement abort of close?)
// Close also does wait on outstanding rows and calls a flush just-in-case.
- List hstoreFilesToSplit = close(false);
+ List hstoreFilesToSplit = close(false);
if (hstoreFilesToSplit == null) {
LOG.warn("Close came back null (Implement abort of close?)");
throw new RuntimeException("close returned empty vector of HStoreFiles");
}
// Split each store file.
- for(HStoreFile h: hstoreFilesToSplit) {
- // A reference to the bottom half of the hsf store file.
- Reference aReference = new Reference(
- this.regionInfo.getEncodedName(), h.getFileId(),
- new HStoreKey(midKey, this.regionInfo), Reference.Range.bottom);
- HStoreFile a = new HStoreFile(this.conf, fs, splits,
- regionAInfo, h.getColFamily(), -1, aReference);
- // Reference to top half of the hsf store file.
- Reference bReference = new Reference(
- this.regionInfo.getEncodedName(), h.getFileId(),
- new HStoreKey(midKey, this.regionInfo), Reference.Range.top);
- HStoreFile b = new HStoreFile(this.conf, fs, splits,
- regionBInfo, h.getColFamily(), -1, bReference);
- h.splitStoreFile(a, b, this.fs);
+ for(StoreFile h: hstoreFilesToSplit) {
+ StoreFile.split(fs,
+ Store.getStoreHomedir(splits, regionAInfo.getEncodedName(),
+ h.getFamily()),
+ h, splitRow, Range.bottom);
+ StoreFile.split(fs,
+ Store.getStoreHomedir(splits, regionBInfo.getEncodedName(),
+ h.getFamily()),
+ h, splitRow, Range.top);
}
// Done!
// Opening the region copies the splits files from the splits directory
// under each region.
- HRegion regionA =
- new HRegion(basedir, log, fs, conf, regionAInfo, null);
+ HRegion regionA = new HRegion(basedir, log, fs, conf, regionAInfo, null);
regionA.initialize(dirA, null);
regionA.close();
- HRegion regionB =
- new HRegion(basedir, log, fs, conf, regionBInfo, null);
+ HRegion regionB = new HRegion(basedir, log, fs, conf, regionBInfo, null);
regionB.initialize(dirB, null);
regionB.close();
@@ -619,10 +604,8 @@ public class HRegion implements HConstants {
LOG.debug("Cleaned up " + FSUtils.getPath(splits) + " " + deleted);
}
HRegion regions[] = new HRegion [] {regionA, regionB};
-
this.historian.addRegionSplit(this.regionInfo,
regionA.getRegionInfo(), regionB.getRegionInfo());
-
return regions;
}
}
@@ -649,15 +632,13 @@ public class HRegion implements HConstants {
* @throws IOException
*/
private void doRegionCompactionCleanup() throws IOException {
- if (this.fs.exists(this.regionCompactionDir)) {
- this.fs.delete(this.regionCompactionDir, true);
- }
+ FSUtils.deleteDirectory(this.fs, this.regionCompactionDir);
}
void setForceMajorCompaction(final boolean b) {
this.forceMajorCompaction = b;
}
-
+
boolean getForceMajorCompaction() {
return this.forceMajorCompaction;
}
@@ -694,16 +675,16 @@ public class HRegion implements HConstants {
* server does them sequentially and not in parallel.
*
* @param majorCompaction True to force a major compaction regardless of thresholds
- * @return mid key if split is needed
+ * @return split row if split is needed
* @throws IOException
*/
byte [] compactStores(final boolean majorCompaction)
throws IOException {
splitsAndClosesLock.readLock().lock();
try {
- byte [] midKey = null;
+ byte [] splitRow = null;
if (this.closed.get()) {
- return midKey;
+ return splitRow;
}
try {
synchronized (writestate) {
@@ -713,7 +694,7 @@ public class HRegion implements HConstants {
LOG.info("NOT compacting region " + this +
": compacting=" + writestate.compacting + ", writesEnabled=" +
writestate.writesEnabled);
- return midKey;
+ return splitRow;
}
}
LOG.info("starting " + (majorCompaction? "major" : "") +
@@ -721,11 +702,11 @@ public class HRegion implements HConstants {
long startTime = System.currentTimeMillis();
doRegionCompactionPrep();
long maxSize = -1;
- for (HStore store: stores.values()) {
- final HStore.StoreSize size = store.compact(majorCompaction);
+ for (Store store: stores.values()) {
+ final Store.StoreSize size = store.compact(majorCompaction);
if (size != null && size.getSize() > maxSize) {
maxSize = size.getSize();
- midKey = size.getKey();
+ splitRow = size.getSplitRow();
}
}
doRegionCompactionCleanup();
@@ -739,7 +720,7 @@ public class HRegion implements HConstants {
writestate.notifyAll();
}
}
- return midKey;
+ return splitRow;
} finally {
splitsAndClosesLock.readLock().unlock();
}
@@ -859,7 +840,7 @@ public class HRegion implements HConstants {
// Get current size of memcaches.
final long currentMemcacheSize = this.memcacheSize.get();
try {
- for (HStore s: stores.values()) {
+ for (Store s: stores.values()) {
s.snapshot();
}
sequenceId = log.startCacheFlush();
@@ -877,7 +858,7 @@ public class HRegion implements HConstants {
// A. Flush memcache to all the HStores.
// Keep running vector of all store files that includes both old and the
// just-made new flush store file.
- for (HStore hstore: stores.values()) {
+ for (Store hstore: stores.values()) {
boolean needsCompaction = hstore.flushCache(completeSequenceId);
if (needsCompaction) {
compactionRequested = true;
@@ -971,7 +952,7 @@ public class HRegion implements HConstants {
checkRow(row);
checkColumn(column);
// Don't need a row lock for a simple get
- HStoreKey key = new HStoreKey(row, column, timestamp, this.regionInfo);
+ HStoreKey key = new HStoreKey(row, column, timestamp);
Cell[] result = getStore(column).get(key, numVersions);
// Guarantee that we return null instead of a zero-length array,
// if there are no results to return.
@@ -1009,16 +990,16 @@ public class HRegion implements HConstants {
checkColumn(column);
}
}
- HStoreKey key = new HStoreKey(row, ts, this.regionInfo);
+ HStoreKey key = new HStoreKey(row, ts);
Integer lid = getLock(lockid,row);
- HashSet storeSet = new HashSet();
+ HashSet storeSet = new HashSet();
try {
HbaseMapWritable result =
new HbaseMapWritable();
// Get the concerned columns or all of them
if (columns != null) {
for (byte[] bs : columns) {
- HStore store = stores.get(Bytes.mapKey(HStoreKey.getFamily(bs)));
+ Store store = stores.get(Bytes.mapKey(HStoreKey.getFamily(bs)));
if (store != null) {
storeSet.add(store);
}
@@ -1033,14 +1014,14 @@ public class HRegion implements HConstants {
if (columns != null) {
for (byte[] bs : columns) {
if (HStoreKey.getFamilyDelimiterIndex(bs) == (bs.length - 1)) {
- HStore store = stores.get(Bytes.mapKey(HStoreKey.getFamily(bs)));
+ Store store = stores.get(Bytes.mapKey(HStoreKey.getFamily(bs)));
store.getFull(key, null, numVersions, result);
storeSet.remove(store);
}
}
}
- for (HStore targetStore: storeSet) {
+ for (Store targetStore: storeSet) {
targetStore.getFull(key, columns, numVersions, result);
}
@@ -1083,17 +1064,17 @@ public class HRegion implements HConstants {
checkRow(row);
splitsAndClosesLock.readLock().lock();
try {
- HStore store = getStore(columnFamily);
+ Store store = getStore(columnFamily);
// get the closest key. (HStore.getRowKeyAtOrBefore can return null)
byte [] closestKey = store.getRowKeyAtOrBefore(row);
// If it happens to be an exact match, we can stop.
// Otherwise, we need to check if it's the max and move to the next
if (closestKey != null) {
- if (HStoreKey.equalsTwoRowKeys(regionInfo, row, closestKey)) {
- key = new HStoreKey(closestKey, this.regionInfo);
+ if (HStoreKey.equalsTwoRowKeys(row, closestKey)) {
+ key = new HStoreKey(closestKey);
}
if (key == null) {
- key = new HStoreKey(closestKey, this.regionInfo);
+ key = new HStoreKey(closestKey);
}
}
if (key == null) {
@@ -1124,16 +1105,16 @@ public class HRegion implements HConstants {
private Set getKeys(final HStoreKey origin, final int versions)
throws IOException {
Set keys = new TreeSet();
- Collection storesToCheck = null;
+ Collection storesToCheck = null;
if (origin.getColumn() == null || origin.getColumn().length == 0) {
// All families
storesToCheck = this.stores.values();
} else {
- storesToCheck = new ArrayList(1);
+ storesToCheck = new ArrayList(1);
storesToCheck.add(getStore(origin.getColumn()));
}
long now = System.currentTimeMillis();
- for (HStore targetStore: storesToCheck) {
+ for (Store targetStore: storesToCheck) {
if (targetStore != null) {
// Pass versions without modification since in the store getKeys, it
// includes the size of the passed keys array when counting.
@@ -1170,15 +1151,15 @@ public class HRegion implements HConstants {
if (this.closed.get()) {
throw new IOException("Region " + this + " closed");
}
- HashSet storeSet = new HashSet();
+ HashSet storeSet = new HashSet();
for (int i = 0; i < cols.length; i++) {
- HStore s = stores.get(Bytes.mapKey(HStoreKey.getFamily(cols[i])));
+ Store s = stores.get(Bytes.mapKey(HStoreKey.getFamily(cols[i])));
if (s != null) {
storeSet.add(s);
}
}
return new HScanner(cols, firstRow, timestamp,
- storeSet.toArray(new HStore [storeSet.size()]), filter);
+ storeSet.toArray(new Store [storeSet.size()]), filter);
} finally {
newScannerLock.readLock().unlock();
}
@@ -1246,8 +1227,7 @@ public class HRegion implements HConstants {
try {
List deletes = null;
for (BatchOperation op: b) {
- HStoreKey key = new HStoreKey(row, op.getColumn(), commitTime,
- this.regionInfo);
+ HStoreKey key = new HStoreKey(row, op.getColumn(), commitTime);
byte[] val = null;
if (op.isPut()) {
val = op.getValue();
@@ -1262,7 +1242,7 @@ public class HRegion implements HConstants {
}
deletes.add(op.getColumn());
} else {
- val = HLogEdit.deleteBytes.get();
+ val = HLogEdit.DELETED_BYTES;
}
}
if (val != null) {
@@ -1339,8 +1319,7 @@ public class HRegion implements HConstants {
System.currentTimeMillis(): b.getTimestamp();
List deletes = null;
for (BatchOperation op: b) {
- HStoreKey key = new HStoreKey(row, op.getColumn(), commitTime,
- this.regionInfo);
+ HStoreKey key = new HStoreKey(row, op.getColumn(), commitTime);
byte[] val = null;
if (op.isPut()) {
val = op.getValue();
@@ -1355,7 +1334,7 @@ public class HRegion implements HConstants {
}
deletes.add(op.getColumn());
} else {
- val = HLogEdit.deleteBytes.get();
+ val = HLogEdit.DELETED_BYTES;
}
}
if (val != null) {
@@ -1460,14 +1439,14 @@ public class HRegion implements HConstants {
Integer lid = getLock(lockid, row);
long now = System.currentTimeMillis();
try {
- for (HStore store : stores.values()) {
+ for (Store store : stores.values()) {
List keys =
- store.getKeys(new HStoreKey(row, ts, this.regionInfo),
+ store.getKeys(new HStoreKey(row, ts),
ALL_VERSIONS, now, null);
TreeMap edits = new TreeMap(
- new HStoreKey.HStoreKeyWritableComparator(regionInfo));
+ new HStoreKey.HStoreKeyWritableComparator());
for (HStoreKey key: keys) {
- edits.put(key, HLogEdit.deleteBytes.get());
+ edits.put(key, HLogEdit.DELETED_BYTES);
}
update(edits);
}
@@ -1494,14 +1473,14 @@ public class HRegion implements HConstants {
Integer lid = getLock(lockid, row);
long now = System.currentTimeMillis();
try {
- for (HStore store : stores.values()) {
+ for (Store store : stores.values()) {
List keys =
- store.getKeys(new HStoreKey(row, timestamp, this.regionInfo),
+ store.getKeys(new HStoreKey(row, timestamp),
ALL_VERSIONS, now, columnPattern);
TreeMap edits = new TreeMap(
- new HStoreKey.HStoreKeyWritableComparator(regionInfo));
+ new HStoreKey.HStoreKeyWritableComparator());
for (HStoreKey key: keys) {
- edits.put(key, HLogEdit.deleteBytes.get());
+ edits.put(key, HLogEdit.DELETED_BYTES);
}
update(edits);
}
@@ -1529,15 +1508,15 @@ public class HRegion implements HConstants {
long now = System.currentTimeMillis();
try {
// find the HStore for the column family
- HStore store = getStore(family);
+ Store store = getStore(family);
// find all the keys that match our criteria
- List keys = store.getKeys(new HStoreKey(row, timestamp,
- this.regionInfo), ALL_VERSIONS, now, null);
+ List keys = store.getKeys(new HStoreKey(row, timestamp),
+ ALL_VERSIONS, now, null);
// delete all the cells
TreeMap edits = new TreeMap(
- new HStoreKey.HStoreKeyWritableComparator(regionInfo));
+ new HStoreKey.HStoreKeyWritableComparator());
for (HStoreKey key: keys) {
- edits.put(key, HLogEdit.deleteBytes.get());
+ edits.put(key, HLogEdit.DELETED_BYTES);
}
update(edits);
} finally {
@@ -1565,18 +1544,18 @@ public class HRegion implements HConstants {
Integer lid = getLock(lockid, row);
long now = System.currentTimeMillis();
try {
- for(HStore store : stores.values()) {
+ for(Store store: stores.values()) {
String familyName = Bytes.toString(store.getFamily().getName());
// check the family name match the family pattern.
if(!(familyPattern.matcher(familyName).matches()))
continue;
- List keys = store.getKeys(new HStoreKey(row, timestamp,
- this.regionInfo), ALL_VERSIONS, now, null);
+ List keys = store.getKeys(new HStoreKey(row, timestamp),
+ ALL_VERSIONS, now, null);
TreeMap edits = new TreeMap(
- new HStoreKey.HStoreKeyWritableComparator(regionInfo));
+ new HStoreKey.HStoreKeyWritableComparator());
for (HStoreKey key: keys) {
- edits.put(key, HLogEdit.deleteBytes.get());
+ edits.put(key, HLogEdit.DELETED_BYTES);
}
update(edits);
}
@@ -1601,13 +1580,13 @@ public class HRegion implements HConstants {
final long ts, final int versions)
throws IOException {
checkReadOnly();
- HStoreKey origin = new HStoreKey(row, column, ts, this.regionInfo);
+ HStoreKey origin = new HStoreKey(row, column, ts);
Set keys = getKeys(origin, versions);
if (keys.size() > 0) {
TreeMap edits = new TreeMap(
- new HStoreKey.HStoreKeyWritableComparator(regionInfo));
+ new HStoreKey.HStoreKeyWritableComparator());
for (HStoreKey key: keys) {
- edits.put(key, HLogEdit.deleteBytes.get());
+ edits.put(key, HLogEdit.DELETED_BYTES);
}
update(edits);
}
@@ -1672,7 +1651,7 @@ public class HRegion implements HConstants {
TreeMap targets = this.targetColumns.get(lockid);
if (targets == null) {
targets = new TreeMap(
- new HStoreKey.HStoreKeyWritableComparator(regionInfo));
+ new HStoreKey.HStoreKeyWritableComparator());
this.targetColumns.put(lockid, targets);
}
targets.put(key, val);
@@ -1759,10 +1738,10 @@ public class HRegion implements HConstants {
// Nothing to do (Replaying is done in HStores)
}
- protected HStore instantiateHStore(Path baseDir,
+ protected Store instantiateHStore(Path baseDir,
HColumnDescriptor c, Path oldLogFile, Progressable reporter)
throws IOException {
- return new HStore(baseDir, this.regionInfo, c, this.fs, oldLogFile,
+ return new Store(baseDir, this.regionInfo, c, this.fs, oldLogFile,
this.conf, reporter);
}
@@ -1773,7 +1752,7 @@ public class HRegion implements HConstants {
* @return Store that goes with the family on passed column.
* TODO: Make this lookup faster.
*/
- public HStore getStore(final byte [] column) {
+ public Store getStore(final byte [] column) {
return this.stores.get(HStoreKey.getFamilyMapKey(column));
}
@@ -1962,7 +1941,7 @@ public class HRegion implements HConstants {
/** Create an HScanner with a handle on many HStores. */
@SuppressWarnings("unchecked")
- HScanner(byte [][] cols, byte [] firstRow, long timestamp, HStore[] stores,
+ HScanner(byte [][] cols, byte [] firstRow, long timestamp, Store [] stores,
RowFilterInterface filter)
throws IOException {
this.filter = filter;
@@ -2004,7 +1983,7 @@ public class HRegion implements HConstants {
this.resultSets = new TreeMap[scanners.length];
this.keys = new HStoreKey[scanners.length];
for (int i = 0; i < scanners.length; i++) {
- keys[i] = new HStoreKey(HConstants.EMPTY_BYTE_ARRAY,regionInfo);
+ keys[i] = new HStoreKey(HConstants.EMPTY_BYTE_ARRAY);
resultSets[i] = new TreeMap(Bytes.BYTES_COMPARATOR);
if(scanners[i] != null && !scanners[i].next(keys[i], resultSets[i])) {
closeScanner(i);
@@ -2016,7 +1995,6 @@ public class HRegion implements HConstants {
activeScannerCount.incrementAndGet();
}
- @SuppressWarnings("null")
public boolean next(HStoreKey key, SortedMap results)
throws IOException {
boolean moreToFollow = false;
@@ -2029,10 +2007,8 @@ public class HRegion implements HConstants {
for (int i = 0; i < this.keys.length; i++) {
if (scanners[i] != null &&
(chosenRow == null ||
- (HStoreKey.compareTwoRowKeys(regionInfo,
- keys[i].getRow(), chosenRow) < 0) ||
- ((HStoreKey.compareTwoRowKeys(regionInfo, keys[i].getRow(),
- chosenRow) == 0) &&
+ (HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) < 0) ||
+ ((HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) == 0) &&
(keys[i].getTimestamp() > chosenTimestamp)))) {
chosenRow = keys[i].getRow();
chosenTimestamp = keys[i].getTimestamp();
@@ -2049,7 +2025,7 @@ public class HRegion implements HConstants {
for (int i = 0; i < scanners.length; i++) {
if (scanners[i] != null &&
- HStoreKey.compareTwoRowKeys(regionInfo,keys[i].getRow(), chosenRow) == 0) {
+ HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) == 0) {
// NOTE: We used to do results.putAll(resultSets[i]);
// but this had the effect of overwriting newer
// values with older ones. So now we only insert
@@ -2071,7 +2047,7 @@ public class HRegion implements HConstants {
// If the current scanner is non-null AND has a lower-or-equal
// row label, then its timestamp is bad. We need to advance it.
while ((scanners[i] != null) &&
- (HStoreKey.compareTwoRowKeys(regionInfo,keys[i].getRow(), chosenRow) <= 0)) {
+ (HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) <= 0)) {
resultSets[i].clear();
if (!scanners[i].next(keys[i], resultSets[i])) {
closeScanner(i);
@@ -2193,8 +2169,8 @@ public class HRegion implements HConstants {
RegionHistorian.getInstance().addRegionCreation(info);
}
HRegion region = new HRegion(tableDir,
- new HLog(fs, new Path(regionDir, HREGION_LOGDIR_NAME), conf, null),
- fs, conf, info, null);
+ new HLog(fs, new Path(regionDir, HREGION_LOGDIR_NAME), conf, null),
+ fs, conf, info, null);
region.initialize(null, null);
return region;
}
@@ -2250,9 +2226,9 @@ public class HRegion implements HConstants {
Integer lid = meta.obtainRowLock(row);
try {
HStoreKey key = new HStoreKey(row, COL_REGIONINFO,
- System.currentTimeMillis(), r.getRegionInfo());
+ System.currentTimeMillis());
TreeMap edits = new TreeMap(
- new HStoreKey.HStoreKeyWritableComparator(meta.getRegionInfo()));
+ new HStoreKey.HStoreKeyWritableComparator());
edits.put(key, Writables.getBytes(r.getRegionInfo()));
meta.update(edits);
} finally {
@@ -2336,7 +2312,9 @@ public class HRegion implements HConstants {
if (LOG.isDebugEnabled()) {
LOG.debug("DELETING region " + regiondir.toString());
}
- fs.delete(regiondir, true);
+ if (!fs.delete(regiondir, true)) {
+ LOG.warn("Failed delete of " + regiondir);
+ }
}
/**
@@ -2373,28 +2351,29 @@ public class HRegion implements HConstants {
*/
public static boolean rowIsInRange(HRegionInfo info, final byte [] row) {
return ((info.getStartKey().length == 0) ||
- (HStoreKey.compareTwoRowKeys(info,info.getStartKey(), row) <= 0)) &&
+ (HStoreKey.compareTwoRowKeys(info.getStartKey(), row) <= 0)) &&
((info.getEndKey().length == 0) ||
- (HStoreKey.compareTwoRowKeys(info,info.getEndKey(), row) > 0));
+ (HStoreKey.compareTwoRowKeys(info.getEndKey(), row) > 0));
}
/**
* Make the directories for a specific column family
*
* @param fs the file system
- * @param basedir base directory where region will live (usually the table dir)
- * @param encodedRegionName encoded region name
+ * @param tabledir base directory where region will live (usually the table dir)
+ * @param hri
* @param colFamily the column family
- * @param tabledesc table descriptor of table
* @throws IOException
*/
- public static void makeColumnFamilyDirs(FileSystem fs, Path basedir,
- int encodedRegionName, byte [] colFamily, HTableDescriptor tabledesc)
+ public static void makeColumnFamilyDirs(FileSystem fs, Path tabledir,
+ final HRegionInfo hri, byte [] colFamily)
throws IOException {
- fs.mkdirs(HStoreFile.getMapDir(basedir, encodedRegionName, colFamily));
- fs.mkdirs(HStoreFile.getInfoDir(basedir, encodedRegionName, colFamily));
+ Path dir = Store.getStoreHomedir(tabledir, hri.getEncodedName(), colFamily);
+ if (!fs.mkdirs(dir)) {
+ LOG.warn("Failed to create " + dir);
+ }
}
-
+
/**
* Merge two HRegions. The regions must be adjacent andmust not overlap.
*
@@ -2416,15 +2395,13 @@ public class HRegion implements HConstants {
throw new IOException("Cannot merge two regions with null start key");
}
// A's start key is null but B's isn't. Assume A comes before B
- } else if ((srcB.getStartKey() == null) // A is not null but B is
- || (HStoreKey.compareTwoRowKeys(srcA.getRegionInfo(),
- srcA.getStartKey(), srcB.getStartKey()) > 0)) { // A > B
+ } else if ((srcB.getStartKey() == null) ||
+ (HStoreKey.compareTwoRowKeys(srcA.getStartKey(), srcB.getStartKey()) > 0)) {
a = srcB;
b = srcA;
}
- if (!HStoreKey.equalsTwoRowKeys(srcA.getRegionInfo(),
- a.getEndKey(), b.getStartKey())) {
+ if (!HStoreKey.equalsTwoRowKeys(a.getEndKey(), b.getStartKey())) {
throw new IOException("Cannot merge non-adjacent regions");
}
return merge(a, b);
@@ -2468,20 +2445,17 @@ public class HRegion implements HConstants {
HTableDescriptor tabledesc = a.getTableDesc();
HLog log = a.getLog();
Path basedir = a.getBaseDir();
- final byte [] startKey = HStoreKey.equalsTwoRowKeys(a.getRegionInfo(),
- a.getStartKey(), EMPTY_BYTE_ARRAY) ||
- HStoreKey.equalsTwoRowKeys(a.getRegionInfo(),
- b.getStartKey(), EMPTY_BYTE_ARRAY) ? EMPTY_BYTE_ARRAY :
- HStoreKey.compareTwoRowKeys(a.getRegionInfo(), a.getStartKey(),
- b.getStartKey()) <= 0 ?
- a.getStartKey() : b.getStartKey();
- final byte [] endKey = HStoreKey.equalsTwoRowKeys(a.getRegionInfo(),
- a.getEndKey(), EMPTY_BYTE_ARRAY) ||
- HStoreKey.equalsTwoRowKeys(b.getRegionInfo(), b.getEndKey(),
- EMPTY_BYTE_ARRAY) ? EMPTY_BYTE_ARRAY :
- HStoreKey.compareTwoRowKeys(a.getRegionInfo(), a.getEndKey(),
- b.getEndKey()) <= 0 ?
- b.getEndKey() : a.getEndKey();
+ final byte [] startKey = HStoreKey.equalsTwoRowKeys(a.getStartKey(),
+ EMPTY_BYTE_ARRAY) ||
+ HStoreKey.equalsTwoRowKeys(b.getStartKey(), EMPTY_BYTE_ARRAY)?
+ EMPTY_BYTE_ARRAY: HStoreKey.compareTwoRowKeys(a.getStartKey(),
+ b.getStartKey()) <= 0?
+ a.getStartKey(): b.getStartKey();
+ final byte [] endKey = HStoreKey.equalsTwoRowKeys(a.getEndKey(),
+ EMPTY_BYTE_ARRAY) ||
+ HStoreKey.equalsTwoRowKeys(b.getEndKey(), EMPTY_BYTE_ARRAY)?
+ EMPTY_BYTE_ARRAY:
+ HStoreKey.compareTwoRowKeys(a.getEndKey(), b.getEndKey()) <= 0? b.getEndKey(): a.getEndKey();
HRegionInfo newRegionInfo = new HRegionInfo(tabledesc, startKey, endKey);
LOG.info("Creating new region " + newRegionInfo.toString());
@@ -2499,37 +2473,31 @@ public class HRegion implements HConstants {
// Move HStoreFiles under new region directory
- Map> byFamily =
- new TreeMap>(Bytes.BYTES_COMPARATOR);
+ Map> byFamily =
+ new TreeMap>(Bytes.BYTES_COMPARATOR);
byFamily = filesByFamily(byFamily, a.close());
byFamily = filesByFamily(byFamily, b.close());
- for (Map.Entry> es : byFamily.entrySet()) {
+ for (Map.Entry> es : byFamily.entrySet()) {
byte [] colFamily = es.getKey();
- makeColumnFamilyDirs(fs, basedir, encodedName, colFamily, tabledesc);
+ makeColumnFamilyDirs(fs, basedir, newRegionInfo, colFamily);
// Because we compacted the source regions we should have no more than two
// HStoreFiles per family and there will be no reference store
- List srcFiles = es.getValue();
+ List srcFiles = es.getValue();
if (srcFiles.size() == 2) {
- long seqA = srcFiles.get(0).loadInfo(fs);
- long seqB = srcFiles.get(1).loadInfo(fs);
+ long seqA = srcFiles.get(0).getMaxSequenceId();
+ long seqB = srcFiles.get(1).getMaxSequenceId();
if (seqA == seqB) {
- // We can't have duplicate sequence numbers
- if (LOG.isDebugEnabled()) {
- LOG.debug("Adjusting sequence id of storeFile " + srcFiles.get(1) +
- " down by one; sequence id A=" + seqA + ", sequence id B=" +
- seqB);
- }
- srcFiles.get(1).writeInfo(fs, seqB - 1);
+ // Can't have same sequenceid since on open of a store, this is what
+ // distingushes the files (see the map of stores how its keyed by
+ // sequenceid).
+ throw new IOException("Files have same sequenceid");
}
}
- for (HStoreFile hsf: srcFiles) {
- HStoreFile dst = new HStoreFile(conf, fs, basedir,
- newRegionInfo, colFamily, -1, null);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Renaming " + hsf + " to " + dst);
- }
- hsf.rename(fs, dst);
+ for (StoreFile hsf: srcFiles) {
+ StoreFile.rename(fs, hsf.getPath(),
+ StoreFile.getUniqueFile(fs, Store.getStoreHomedir(basedir,
+ newRegionInfo.getEncodedName(), colFamily)));
}
}
if (LOG.isDebugEnabled()) {
@@ -2555,15 +2523,17 @@ public class HRegion implements HConstants {
* Fills a map with a vector of store files keyed by column family.
* @param byFamily Map to fill.
* @param storeFiles Store files to process.
+ * @param family
* @return Returns byFamily
*/
- private static Map> filesByFamily(
- Map> byFamily, List storeFiles) {
- for (HStoreFile src: storeFiles) {
- List v = byFamily.get(src.getColFamily());
+ private static Map> filesByFamily(
+ Map> byFamily, List storeFiles) {
+ for (StoreFile src: storeFiles) {
+ byte [] family = src.getFamily();
+ List v = byFamily.get(family);
if (v == null) {
- v = new ArrayList();
- byFamily.put(src.getColFamily(), v);
+ v = new ArrayList();
+ byFamily.put(family, v);
}
v.add(src);
}
@@ -2582,7 +2552,7 @@ public class HRegion implements HConstants {
* @throws IOException
*/
boolean isMajorCompaction() throws IOException {
- for (HStore store: this.stores.values()) {
+ for (Store store: this.stores.values()) {
if (store.isMajorCompaction()) {
return true;
}
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index e5be042..fa632b3 100644
--- a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -647,7 +647,7 @@ public class HRegionServer implements HConstants, HRegionInterface, HBaseRPCErro
int storefileIndexSizeMB = 0;
synchronized (r.stores) {
stores += r.stores.size();
- for (HStore store: r.stores.values()) {
+ for (Store store: r.stores.values()) {
storefiles += store.getStorefilesCount();
storefileIndexSizeMB +=
(int)(store.getStorefilesIndexSize()/1024/1024);
@@ -955,8 +955,8 @@ public class HRegionServer implements HConstants, HRegionInterface, HBaseRPCErro
memcacheSize += r.memcacheSize.get();
synchronized (r.stores) {
stores += r.stores.size();
- for(Map.Entry ee: r.stores.entrySet()) {
- HStore store = ee.getValue();
+ for(Map.Entry ee: r.stores.entrySet()) {
+ Store store = ee.getValue();
storefiles += store.getStorefilesCount();
try {
storefileIndexSize += store.getStorefilesIndexSize();
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HStore.java b/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
deleted file mode 100644
index a9e86b3..0000000
--- a/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ /dev/null
@@ -1,2207 +0,0 @@
-/**
- * Copyright 2007 The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.regionserver;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-import java.util.concurrent.CopyOnWriteArraySet;
-import java.util.concurrent.locks.ReentrantReadWriteLock;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HStoreKey;
-import org.apache.hadoop.hbase.RemoteExceptionHandler;
-import org.apache.hadoop.hbase.filter.RowFilterInterface;
-import org.apache.hadoop.hbase.io.BloomFilterMapFile;
-import org.apache.hadoop.hbase.io.Cell;
-import org.apache.hadoop.hbase.io.HBaseMapFile;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.io.Reference;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.io.MapFile;
-import org.apache.hadoop.hbase.io.SequenceFile;
-import org.apache.hadoop.util.Progressable;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * HStore maintains a bunch of data files. It is responsible for maintaining
- * the memory/file hierarchy and for periodic flushes to disk and compacting
- * edits to the file.
- *
- * Locking and transactions are handled at a higher level. This API should not
- * be called directly by any writer, but rather by an HRegion manager.
- */
-public class HStore implements HConstants {
- static final Log LOG = LogFactory.getLog(HStore.class);
-
- /*
- * Regex that will work for straight filenames and for reference names.
- * If reference, then the regex has more than just one group. Group 1 is
- * this files id. Group 2 the referenced region name, etc.
- */
- private static final Pattern REF_NAME_PARSER =
- Pattern.compile("^(\\d+)(?:\\.(.+))?$");
-
- protected final Memcache memcache;
- private final Path basedir;
- private final HRegionInfo info;
- private final HColumnDescriptor family;
- private final SequenceFile.CompressionType compression;
- final FileSystem fs;
- private final HBaseConfiguration conf;
- // ttl in milliseconds.
- protected long ttl;
- private long majorCompactionTime;
- private int maxFilesToCompact;
- private final long desiredMaxFileSize;
- private volatile long storeSize;
-
- private final Integer flushLock = new Integer(0);
-
- final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
-
- final byte [] storeName;
- private final String storeNameStr;
-
- /*
- * Sorted Map of readers keyed by sequence id (Most recent should be last in
- * in list).
- */
- private final SortedMap storefiles =
- Collections.synchronizedSortedMap(new TreeMap());
-
- /*
- * Sorted Map of readers keyed by sequence id (Most recent is last in list).
- */
- private final SortedMap readers =
- new TreeMap();
-
- // The most-recent log-seq-ID that's present. The most-recent such ID means
- // we can ignore all log messages up to and including that ID (because they're
- // already reflected in the TreeMaps).
- private volatile long maxSeqId;
-
- private final Path compactionDir;
- private final Integer compactLock = new Integer(0);
- private final int compactionThreshold;
-
- // All access must be synchronized.
- private final CopyOnWriteArraySet changedReaderObservers =
- new CopyOnWriteArraySet();
-
- /**
- * An HStore is a set of zero or more MapFiles, which stretch backwards over
- * time. A given HStore is responsible for a certain set of columns for a
- * row in the HRegion.
- *
- * The HRegion starts writing to its set of HStores when the HRegion's
- * memcache is flushed. This results in a round of new MapFiles, one for
- * each HStore.
- *
- *
There's no reason to consider append-logging at this level; all logging
- * and locking is handled at the HRegion level. HStore just provides
- * services to manage sets of MapFiles. One of the most important of those
- * services is MapFile-compaction services.
- *
- *
The only thing having to do with logs that HStore needs to deal with is
- * the reconstructionLog. This is a segment of an HRegion's log that might
- * NOT be present upon startup. If the param is NULL, there's nothing to do.
- * If the param is non-NULL, we need to process the log to reconstruct
- * a TreeMap that might not have been written to disk before the process
- * died.
- *
- *
It's assumed that after this constructor returns, the reconstructionLog
- * file will be deleted (by whoever has instantiated the HStore).
- *
- * @param basedir qualified path under which the region directory lives
- * @param info HRegionInfo for this region
- * @param family HColumnDescriptor for this column
- * @param fs file system object
- * @param reconstructionLog existing log file to apply if any
- * @param conf configuration object
- * @param reporter Call on a period so hosting server can report we're
- * making progress to master -- otherwise master might think region deploy
- * failed. Can be null.
- * @throws IOException
- */
- protected HStore(Path basedir, HRegionInfo info, HColumnDescriptor family,
- FileSystem fs, Path reconstructionLog, HBaseConfiguration conf,
- final Progressable reporter)
- throws IOException {
- this.basedir = basedir;
- this.info = info;
- this.family = family;
- this.fs = fs;
- this.conf = conf;
- // getTimeToLive returns ttl in seconds. Convert to milliseconds.
- this.ttl = family.getTimeToLive();
- if (ttl != HConstants.FOREVER) {
- this.ttl *= 1000;
- }
- this.memcache = new Memcache(this.ttl, info);
- this.compactionDir = HRegion.getCompactionDir(basedir);
- this.storeName = Bytes.toBytes(this.info.getEncodedName() + "/" +
- Bytes.toString(this.family.getName()));
- this.storeNameStr = Bytes.toString(this.storeName);
-
- // By default, we compact if an HStore has more than
- // MIN_COMMITS_FOR_COMPACTION map files
- this.compactionThreshold =
- conf.getInt("hbase.hstore.compactionThreshold", 3);
-
- // By default we split region if a file > DEFAULT_MAX_FILE_SIZE.
- long maxFileSize = info.getTableDesc().getMaxFileSize();
- if (maxFileSize == HConstants.DEFAULT_MAX_FILE_SIZE) {
- maxFileSize = conf.getLong("hbase.hregion.max.filesize",
- HConstants.DEFAULT_MAX_FILE_SIZE);
- }
- this.desiredMaxFileSize = maxFileSize;
-
- this.majorCompactionTime =
- conf.getLong(HConstants.MAJOR_COMPACTION_PERIOD, 86400000);
- if (family.getValue(HConstants.MAJOR_COMPACTION_PERIOD) != null) {
- String strCompactionTime =
- family.getValue(HConstants.MAJOR_COMPACTION_PERIOD);
- this.majorCompactionTime = (new Long(strCompactionTime)).longValue();
- }
-
- this.maxFilesToCompact = conf.getInt("hbase.hstore.compaction.max", 10);
- this.storeSize = 0L;
-
- if (family.getCompression() == HColumnDescriptor.CompressionType.BLOCK) {
- this.compression = SequenceFile.CompressionType.BLOCK;
- } else if (family.getCompression() ==
- HColumnDescriptor.CompressionType.RECORD) {
- this.compression = SequenceFile.CompressionType.RECORD;
- } else {
- this.compression = SequenceFile.CompressionType.NONE;
- }
-
- Path mapdir = checkdir(HStoreFile.getMapDir(basedir, info.getEncodedName(),
- family.getName()));
- Path infodir = checkdir(HStoreFile.getInfoDir(basedir, info.getEncodedName(),
- family.getName()));
-
- // Go through the 'mapdir' and 'infodir' together, make sure that all
- // MapFiles are in a reliable state. Every entry in 'mapdir' must have a
- // corresponding one in 'loginfodir'. Without a corresponding log info
- // file, the entry in 'mapdir' must be deleted.
- // loadHStoreFiles also computes the max sequence id internally.
- this.maxSeqId = -1L;
- this.storefiles.putAll(loadHStoreFiles(infodir, mapdir));
- if (LOG.isDebugEnabled() && this.storefiles.size() > 0) {
- LOG.debug("Loaded " + this.storefiles.size() + " file(s) in hstore " +
- Bytes.toString(this.storeName) + ", max sequence id " + this.maxSeqId);
- }
-
- // Do reconstruction log.
- runReconstructionLog(reconstructionLog, this.maxSeqId, reporter);
-
- // Finally, start up all the map readers!
- setupReaders();
- }
-
- /*
- * Setup the mapfile readers for this store. There could be more than one
- * since we haven't compacted yet.
- * @throws IOException
- */
- private void setupReaders() throws IOException {
- boolean first = true;
- for(Map.Entry e: this.storefiles.entrySet()) {
- MapFile.Reader r = null;
- if (first) {
- // Use a block cache (if configured) for the first reader only
- // so as to control memory usage.
- r = e.getValue().getReader(this.fs, this.family.isBloomfilter(),
- family.isBlockCacheEnabled());
- first = false;
- } else {
- r = e.getValue().getReader(this.fs, this.family.isBloomfilter(),
- false);
- }
- this.readers.put(e.getKey(), r);
- }
- }
-
- /*
- * @param dir If doesn't exist, create it.
- * @return Passed dir.
- * @throws IOException
- */
- private Path checkdir(final Path dir) throws IOException {
- if (!fs.exists(dir)) {
- fs.mkdirs(dir);
- }
- return dir;
- }
-
- HColumnDescriptor getFamily() {
- return this.family;
- }
-
- long getMaxSequenceId() {
- return this.maxSeqId;
- }
-
- /*
- * Run reconstuction log
- * @param reconstructionLog
- * @param msid
- * @param reporter
- * @throws IOException
- */
- private void runReconstructionLog(final Path reconstructionLog,
- final long msid, final Progressable reporter)
- throws IOException {
- try {
- doReconstructionLog(reconstructionLog, msid, reporter);
- } catch (EOFException e) {
- // Presume we got here because of lack of HADOOP-1700; for now keep going
- // but this is probably not what we want long term. If we got here there
- // has been data-loss
- LOG.warn("Exception processing reconstruction log " + reconstructionLog +
- " opening " + this.storeName +
- " -- continuing. Probably lack-of-HADOOP-1700 causing DATA LOSS!", e);
- } catch (IOException e) {
- // Presume we got here because of some HDFS issue. Don't just keep going.
- // Fail to open the HStore. Probably means we'll fail over and over
- // again until human intervention but alternative has us skipping logs
- // and losing edits: HBASE-642.
- LOG.warn("Exception processing reconstruction log " + reconstructionLog +
- " opening " + this.storeName, e);
- throw e;
- }
- }
-
- /*
- * Read the reconstructionLog to see whether we need to build a brand-new
- * MapFile out of non-flushed log entries.
- *
- * We can ignore any log message that has a sequence ID that's equal to or
- * lower than maxSeqID. (Because we know such log messages are already
- * reflected in the MapFiles.)
- */
- @SuppressWarnings("unchecked")
- private void doReconstructionLog(final Path reconstructionLog,
- final long maxSeqID, final Progressable reporter)
- throws UnsupportedEncodingException, IOException {
- if (reconstructionLog == null || !fs.exists(reconstructionLog)) {
- // Nothing to do.
- return;
- }
- // Check its not empty.
- FileStatus[] stats = fs.listStatus(reconstructionLog);
- if (stats == null || stats.length == 0) {
- LOG.warn("Passed reconstruction log " + reconstructionLog + " is zero-length");
- return;
- }
- long maxSeqIdInLog = -1;
- TreeMap reconstructedCache =
- new TreeMap(new HStoreKey.HStoreKeyWritableComparator(this.info));
-
- SequenceFile.Reader logReader = new SequenceFile.Reader(this.fs,
- reconstructionLog, this.conf);
-
- try {
- HLogKey key = new HLogKey();
- HLogEdit val = new HLogEdit();
- long skippedEdits = 0;
- long editsCount = 0;
- // How many edits to apply before we send a progress report.
- int reportInterval = this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
- while (logReader.next(key, val)) {
- maxSeqIdInLog = Math.max(maxSeqIdInLog, key.getLogSeqNum());
- if (key.getLogSeqNum() <= maxSeqID) {
- skippedEdits++;
- continue;
- }
- // Check this edit is for me. Also, guard against writing
- // METACOLUMN info such as HBASE::CACHEFLUSH entries
- byte [] column = val.getColumn();
- if (val.isTransactionEntry() || Bytes.equals(column, HLog.METACOLUMN)
- || !Bytes.equals(key.getRegionName(), info.getRegionName())
- || !HStoreKey.matchingFamily(family.getName(), column)) {
- continue;
- }
- HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp(),
- this.info);
- reconstructedCache.put(k, val.getVal());
- editsCount++;
- // Every 2k edits, tell the reporter we're making progress.
- // Have seen 60k edits taking 3minutes to complete.
- if (reporter != null && (editsCount % reportInterval) == 0) {
- reporter.progress();
- }
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +
- " because sequence id <= " + maxSeqID);
- }
- } finally {
- logReader.close();
- }
-
- if (reconstructedCache.size() > 0) {
- // We create a "virtual flush" at maxSeqIdInLog+1.
- if (LOG.isDebugEnabled()) {
- LOG.debug("flushing reconstructionCache");
- }
- internalFlushCache(reconstructedCache, maxSeqIdInLog + 1);
- }
- }
-
- /*
- * Creates a series of HStoreFiles loaded from the given directory.
- * There must be a matching 'mapdir' and 'loginfo' pair of files.
- * If only one exists, we'll delete it. Does other consistency tests
- * checking files are not zero, etc.
- *
- * @param infodir qualified path for info file directory
- * @param mapdir qualified path for map file directory
- * @throws IOException
- */
- private SortedMap loadHStoreFiles(Path infodir, Path mapdir)
- throws IOException {
- // Look first at info files. If a reference, these contain info we need
- // to create the HStoreFile.
- FileStatus infofiles[] = fs.listStatus(infodir);
- SortedMap results = new TreeMap();
- ArrayList mapfiles = new ArrayList(infofiles.length);
- for (int i = 0; i < infofiles.length; i++) {
- Path p = infofiles[i].getPath();
- // Check for empty info file. Should never be the case but can happen
- // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
- if (this.fs.getFileStatus(p).getLen() <= 0) {
- LOG.warn("Skipping " + p + " because its empty. DATA LOSS? Can " +
- "this scenario be repaired? HBASE-646");
- continue;
- }
-
- Matcher m = REF_NAME_PARSER.matcher(p.getName());
- /*
- * * * * * N O T E * * * * *
- *
- * We call isReference(Path, Matcher) here because it calls
- * Matcher.matches() which must be called before Matcher.group(int)
- * and we don't want to call Matcher.matches() twice.
- *
- * * * * * N O T E * * * * *
- */
- boolean isReference = isReference(p, m);
- long fid = Long.parseLong(m.group(1));
-
- HStoreFile curfile = null;
- Reference reference = null;
- if (isReference) {
- reference = HStoreFile.readSplitInfo(p, fs);
- }
- curfile = new HStoreFile(conf, fs, basedir, this.info,
- family.getName(), fid, reference);
- long storeSeqId = -1;
- try {
- storeSeqId = curfile.loadInfo(fs);
- if (storeSeqId > this.maxSeqId) {
- this.maxSeqId = storeSeqId;
- }
- } catch (IOException e) {
- // If the HSTORE_LOGINFOFILE doesn't contain a number, just ignore it.
- // That means it was built prior to the previous run of HStore, and so
- // it cannot contain any updates also contained in the log.
- LOG.info("HSTORE_LOGINFOFILE " + curfile +
- " does not contain a sequence number - ignoring");
- }
- Path mapfile = curfile.getMapFilePath();
- if (!fs.exists(mapfile)) {
- fs.delete(curfile.getInfoFilePath(), false);
- LOG.warn("Mapfile " + mapfile.toString() + " does not exist. " +
- "Cleaned up info file. Continuing...Probable DATA LOSS!!!");
- continue;
- }
- // References don't have data or index components under mapfile.
- if (!isReference && isEmptyDataFile(mapfile)) {
- curfile.delete();
- // We can have empty data file if data loss in hdfs.
- LOG.warn("Mapfile " + mapfile.toString() + " has empty data. " +
- "Deleting. Continuing...Probable DATA LOSS!!! See HBASE-646.");
- continue;
- }
- if (!isReference && isEmptyIndexFile(mapfile)) {
- try {
- // Try fixing this file.. if we can. Use the hbase version of fix.
- // Need to remove the old index file first else fix won't go ahead.
- this.fs.delete(new Path(mapfile, MapFile.INDEX_FILE_NAME), false);
- // TODO: This is going to fail if we are to rebuild a file from
- // meta because it won't have right comparator: HBASE-848.
- long count = MapFile.fix(this.fs, mapfile, HStoreKey.class,
- HBaseMapFile.VALUE_CLASS, false, this.conf);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Fixed index on " + mapfile.toString() + "; had " +
- count + " entries");
- }
- } catch (Exception e) {
- LOG.warn("Failed fix of " + mapfile.toString() +
- "...continuing; Probable DATA LOSS!!!", e);
- continue;
- }
- }
- long length = curfile.length();
- storeSize += length;
-
- // TODO: Confirm referent exists.
-
- // Found map and sympathetic info file. Add this hstorefile to result.
- if (LOG.isDebugEnabled()) {
- LOG.debug("loaded " + FSUtils.getPath(p) + ", isReference=" +
- isReference + ", sequence id=" + storeSeqId +
- ", length=" + length + ", majorCompaction=" +
- curfile.isMajorCompaction());
- }
- results.put(Long.valueOf(storeSeqId), curfile);
- // Keep list of sympathetic data mapfiles for cleaning info dir in next
- // section. Make sure path is fully qualified for compare.
- mapfiles.add(this.fs.makeQualified(mapfile));
- }
- cleanDataFiles(mapfiles, mapdir);
- return results;
- }
-
- /*
- * If no info file delete the sympathetic data file.
- * @param mapfiles List of mapfiles.
- * @param mapdir Directory to check.
- * @throws IOException
- */
- private void cleanDataFiles(final List mapfiles, final Path mapdir)
- throws IOException {
- // List paths by experience returns fully qualified names -- at least when
- // running on a mini hdfs cluster.
- FileStatus [] datfiles = fs.listStatus(mapdir);
- for (int i = 0; i < datfiles.length; i++) {
- Path p = datfiles[i].getPath();
- // If does not have sympathetic info file, delete.
- Path qualifiedP = fs.makeQualified(p);
- if (!mapfiles.contains(qualifiedP)) {
- fs.delete(p, true);
- }
- }
- }
-
- /*
- * @param mapfile
- * @return True if the passed mapfile has a zero-length data component (its
- * broken).
- * @throws IOException
- */
- private boolean isEmptyDataFile(final Path mapfile)
- throws IOException {
- // Mapfiles are made of 'data' and 'index' files. Confirm 'data' is
- // non-null if it exists (may not have been written to yet).
- return isEmptyFile(new Path(mapfile, MapFile.DATA_FILE_NAME));
- }
-
- /*
- * @param mapfile
- * @return True if the passed mapfile has a zero-length index component (its
- * broken).
- * @throws IOException
- */
- private boolean isEmptyIndexFile(final Path mapfile)
- throws IOException {
- // Mapfiles are made of 'data' and 'index' files. Confirm 'data' is
- // non-null if it exists (may not have been written to yet).
- return isEmptyFile(new Path(mapfile, MapFile.INDEX_FILE_NAME));
- }
-
- /*
- * @param f
- * @return True if the passed file does not exist or is zero-length (its
- * broken).
- * @throws IOException
- */
- private boolean isEmptyFile(final Path f)
- throws IOException {
- return !this.fs.exists(f) || this.fs.getFileStatus(f).getLen() == 0;
- }
-
- /**
- * Adds a value to the memcache
- *
- * @param key
- * @param value
- * @return memcache size delta
- */
- protected long add(HStoreKey key, byte[] value) {
- lock.readLock().lock();
- try {
- return this.memcache.add(key, value);
- } finally {
- lock.readLock().unlock();
- }
- }
-
- /**
- * Close all the MapFile readers
- *
- * We don't need to worry about subsequent requests because the HRegion holds
- * a write lock that will prevent any more reads or writes.
- *
- * @throws IOException
- */
- List close() throws IOException {
- ArrayList result = null;
- this.lock.writeLock().lock();
- try {
- for (MapFile.Reader reader: this.readers.values()) {
- reader.close();
- }
- synchronized (this.storefiles) {
- result = new ArrayList(storefiles.values());
- }
- LOG.debug("closed " + this.storeNameStr);
- return result;
- } finally {
- this.lock.writeLock().unlock();
- }
- }
-
- //////////////////////////////////////////////////////////////////////////////
- // Flush changes to disk
- //////////////////////////////////////////////////////////////////////////////
-
- /**
- * Snapshot this stores memcache. Call before running
- * {@link #flushCache(long)} so it has some work to do.
- */
- void snapshot() {
- this.memcache.snapshot();
- }
-
- /**
- * Write out current snapshot. Presumes {@link #snapshot()} has been called
- * previously.
- * @param logCacheFlushId flush sequence number
- * @return true if a compaction is needed
- * @throws IOException
- */
- boolean flushCache(final long logCacheFlushId) throws IOException {
- // Get the snapshot to flush. Presumes that a call to
- // this.memcache.snapshot() has happened earlier up in the chain.
- SortedMap cache = this.memcache.getSnapshot();
- boolean compactionNeeded = internalFlushCache(cache, logCacheFlushId);
- // If an exception happens flushing, we let it out without clearing
- // the memcache snapshot. The old snapshot will be returned when we say
- // 'snapshot', the next time flush comes around.
- this.memcache.clearSnapshot(cache);
- return compactionNeeded;
- }
-
- private boolean internalFlushCache(final SortedMap cache,
- final long logCacheFlushId)
- throws IOException {
- long flushed = 0;
- // Don't flush if there are no entries.
- if (cache.size() == 0) {
- return false;
- }
-
- // TODO: We can fail in the below block before we complete adding this
- // flush to list of store files. Add cleanup of anything put on filesystem
- // if we fail.
- synchronized(flushLock) {
- long now = System.currentTimeMillis();
- // A. Write the Maps out to the disk
- HStoreFile flushedFile = new HStoreFile(conf, fs, basedir,
- this.info, family.getName(), -1L, null);
- MapFile.Writer out = flushedFile.getWriter(this.fs, this.compression,
- this.family.isBloomfilter(), cache.size());
- setIndexInterval(out);
-
- // Here we tried picking up an existing HStoreFile from disk and
- // interlacing the memcache flush compacting as we go. The notion was
- // that interlacing would take as long as a pure flush with the added
- // benefit of having one less file in the store. Experiments showed that
- // it takes two to three times the amount of time flushing -- more column
- // families makes it so the two timings come closer together -- but it
- // also complicates the flush. The code was removed. Needed work picking
- // which file to interlace (favor references first, etc.)
- //
- // Related, looks like 'merging compactions' in BigTable paper interlaces
- // a memcache flush. We don't.
- int entries = 0;
- try {
- for (Map.Entry es: cache.entrySet()) {
- HStoreKey curkey = es.getKey();
- byte[] bytes = es.getValue();
- if (HStoreKey.matchingFamily(this.family.getName(), curkey.getColumn())) {
- if (!isExpired(curkey, ttl, now)) {
- entries++;
- out.append(curkey, new ImmutableBytesWritable(bytes));
- flushed += this.memcache.heapSize(curkey, bytes, null);
- }
- }
- }
- } finally {
- out.close();
- }
- long newStoreSize = flushedFile.length();
- storeSize += newStoreSize;
-
- // B. Write out the log sequence number that corresponds to this output
- // MapFile. The MapFile is current up to and including the log seq num.
- flushedFile.writeInfo(fs, logCacheFlushId);
-
- // C. Finally, make the new MapFile available.
- updateReaders(logCacheFlushId, flushedFile);
- if(LOG.isDebugEnabled()) {
- LOG.debug("Added " + FSUtils.getPath(flushedFile.getMapFilePath()) +
- " with " + entries +
- " entries, sequence id " + logCacheFlushId + ", data size ~" +
- StringUtils.humanReadableInt(flushed) + ", file size " +
- StringUtils.humanReadableInt(newStoreSize) + " to " +
- this.info.getRegionNameAsString());
- }
- }
- return storefiles.size() >= compactionThreshold;
- }
-
- /*
- * Change readers adding into place the Reader produced by this new flush.
- * @param logCacheFlushId
- * @param flushedFile
- * @throws IOException
- */
- private void updateReaders(final long logCacheFlushId,
- final HStoreFile flushedFile)
- throws IOException {
- this.lock.writeLock().lock();
- try {
- Long flushid = Long.valueOf(logCacheFlushId);
- // Open the map file reader.
- this.readers.put(flushid,
- flushedFile.getReader(this.fs, this.family.isBloomfilter(),
- this.family.isBlockCacheEnabled()));
- this.storefiles.put(flushid, flushedFile);
- // Tell listeners of the change in readers.
- notifyChangedReadersObservers();
- } finally {
- this.lock.writeLock().unlock();
- }
- }
-
- /*
- * Notify all observers that set of Readers has changed.
- * @throws IOException
- */
- private void notifyChangedReadersObservers() throws IOException {
- for (ChangedReadersObserver o: this.changedReaderObservers) {
- o.updateReaders();
- }
- }
-
- /*
- * @param o Observer who wants to know about changes in set of Readers
- */
- void addChangedReaderObserver(ChangedReadersObserver o) {
- this.changedReaderObservers.add(o);
- }
-
- /*
- * @param o Observer no longer interested in changes in set of Readers.
- */
- void deleteChangedReaderObserver(ChangedReadersObserver o) {
- if (!this.changedReaderObservers.remove(o)) {
- LOG.warn("Not in set" + o);
- }
- }
-
- //////////////////////////////////////////////////////////////////////////////
- // Compaction
- //////////////////////////////////////////////////////////////////////////////
-
- /*
- * @param files
- * @return True if any of the files in files are References.
- */
- private boolean hasReferences(Collection files) {
- if (files != null && files.size() > 0) {
- for (HStoreFile hsf: files) {
- if (hsf.isReference()) {
- return true;
- }
- }
- }
- return false;
- }
-
- /*
- * Gets lowest timestamp from files in a dir
- *
- * @param fs
- * @param dir
- * @throws IOException
- */
- private static long getLowestTimestamp(FileSystem fs, Path dir)
- throws IOException {
- FileStatus[] stats = fs.listStatus(dir);
- if (stats == null || stats.length == 0) {
- return 0l;
- }
- long lowTimestamp = Long.MAX_VALUE;
- for (int i = 0; i < stats.length; i++) {
- long timestamp = stats[i].getModificationTime();
- if (timestamp < lowTimestamp){
- lowTimestamp = timestamp;
- }
- }
- return lowTimestamp;
- }
-
- /**
- * Compact the back-HStores. This method may take some time, so the calling
- * thread must be able to block for long periods.
- *
- * During this time, the HStore can work as usual, getting values from
- * MapFiles and writing new MapFiles from the Memcache.
- *
- * Existing MapFiles are not destroyed until the new compacted TreeMap is
- * completely written-out to disk.
- *
- * The compactLock prevents multiple simultaneous compactions.
- * The structureLock prevents us from interfering with other write operations.
- *
- * We don't want to hold the structureLock for the whole time, as a compact()
- * can be lengthy and we want to allow cache-flushes during this period.
- *
- * @param majorCompaction True to force a major compaction regardless of
- * thresholds
- * @return mid key if a split is needed, null otherwise
- * @throws IOException
- */
- StoreSize compact(final boolean majorCompaction) throws IOException {
- boolean forceSplit = this.info.shouldSplit(false);
- boolean doMajorCompaction = majorCompaction;
- synchronized (compactLock) {
- long maxId = -1;
- List filesToCompact = null;
- synchronized (storefiles) {
- if (this.storefiles.size() <= 0) {
- LOG.debug(this.storeNameStr + ": no store files to compact");
- return null;
- }
- // filesToCompact are sorted oldest to newest.
- filesToCompact = new ArrayList(this.storefiles.values());
-
- // The max-sequenceID in any of the to-be-compacted TreeMaps is the
- // last key of storefiles.
- maxId = this.storefiles.lastKey().longValue();
- }
- // Check to see if we need to do a major compaction on this region.
- // If so, change doMajorCompaction to true to skip the incremental
- // compacting below. Only check if doMajorCompaction is not true.
- if (!doMajorCompaction) {
- doMajorCompaction = isMajorCompaction(filesToCompact);
- }
- boolean references = hasReferences(filesToCompact);
- if (!doMajorCompaction && !references &&
- (forceSplit || (filesToCompact.size() < compactionThreshold))) {
- return checkSplit(forceSplit);
- }
- if (!fs.exists(compactionDir) && !fs.mkdirs(compactionDir)) {
- LOG.warn("Mkdir on " + compactionDir.toString() + " failed");
- return checkSplit(forceSplit);
- }
-
- // HBASE-745, preparing all store file sizes for incremental compacting
- // selection.
- int countOfFiles = filesToCompact.size();
- long totalSize = 0;
- long[] fileSizes = new long[countOfFiles];
- long skipped = 0;
- int point = 0;
- for (int i = 0; i < countOfFiles; i++) {
- HStoreFile file = filesToCompact.get(i);
- Path path = file.getMapFilePath();
- if (path == null) {
- LOG.warn("Path is null for " + file);
- return null;
- }
- int len = 0;
- // listStatus can come back null.
- FileStatus [] fss = this.fs.listStatus(path);
- for (int ii = 0; fss != null && ii < fss.length; ii++) {
- len += fss[ii].getLen();
- }
- fileSizes[i] = len;
- totalSize += len;
- }
- if (!doMajorCompaction && !references) {
- // Here we select files for incremental compaction.
- // The rule is: if the largest(oldest) one is more than twice the
- // size of the second, skip the largest, and continue to next...,
- // until we meet the compactionThreshold limit.
- for (point = 0; point < countOfFiles - 1; point++) {
- if ((fileSizes[point] < fileSizes[point + 1] * 2) &&
- (countOfFiles - point) <= maxFilesToCompact) {
- break;
- }
- skipped += fileSizes[point];
- }
- filesToCompact = new ArrayList(filesToCompact.subList(point,
- countOfFiles));
- if (filesToCompact.size() <= 1) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Skipped compaction of 1 file; compaction size of " +
- this.storeNameStr + ": " +
- StringUtils.humanReadableInt(totalSize) + "; Skipped " + point +
- " files, size: " + skipped);
- }
- return checkSplit(forceSplit);
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("Compaction size of " + this.storeNameStr + ": " +
- StringUtils.humanReadableInt(totalSize) + "; Skipped " + point +
- " file(s), size: " + skipped);
- }
- }
-
- /*
- * We create a new list of MapFile.Reader objects so we don't screw up
- * the caching associated with the currently-loaded ones. Our iteration-
- * based access pattern is practically designed to ruin the cache.
- */
- List rdrs = new ArrayList();
- int nrows = createReaders(rdrs, filesToCompact);
-
- // Step through them, writing to the brand-new MapFile
- HStoreFile compactedOutputFile = new HStoreFile(conf, fs,
- this.compactionDir, this.info, family.getName(), -1L, null);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Started compaction of " + rdrs.size() + " file(s)" +
- (references? ", hasReferences=true,": " ") + " into " +
- FSUtils.getPath(compactedOutputFile.getMapFilePath()));
- }
- MapFile.Writer writer = compactedOutputFile.getWriter(this.fs,
- this.compression, this.family.isBloomfilter(), nrows);
- setIndexInterval(writer);
- try {
- compact(writer, rdrs, doMajorCompaction);
- } finally {
- writer.close();
- }
-
- // Now, write out an HSTORE_LOGINFOFILE for the brand-new TreeMap.
- compactedOutputFile.writeInfo(fs, maxId, doMajorCompaction);
-
- // Move the compaction into place.
- completeCompaction(filesToCompact, compactedOutputFile);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Completed " + (doMajorCompaction? "major": "") +
- " compaction of " + this.storeNameStr +
- " store size is " + StringUtils.humanReadableInt(storeSize));
- }
- }
- return checkSplit(forceSplit);
- }
-
- /*
- * Set the index interval for the mapfile. There are two sources for
- * configuration information: the HCD, and the global HBase config.
- * If a source returns the default value, it is ignored. Otherwise,
- * the smallest non-default value is preferred.
- */
- private void setIndexInterval(MapFile.Writer writer) {
- int familyInterval = this.family.getMapFileIndexInterval();
- int interval = this.conf.getInt("hbase.io.index.interval",
- HColumnDescriptor.DEFAULT_MAPFILE_INDEX_INTERVAL);
- if (familyInterval != HColumnDescriptor.DEFAULT_MAPFILE_INDEX_INTERVAL) {
- if (interval != HColumnDescriptor.DEFAULT_MAPFILE_INDEX_INTERVAL) {
- if (familyInterval < interval)
- interval = familyInterval;
- } else {
- interval = familyInterval;
- }
- }
- writer.setIndexInterval(interval);
- }
-
- /*
- * @return True if we should run a major compaction.
- */
- boolean isMajorCompaction() throws IOException {
- List filesToCompact = null;
- synchronized (storefiles) {
- // filesToCompact are sorted oldest to newest.
- filesToCompact = new ArrayList(this.storefiles.values());
- }
- return isMajorCompaction(filesToCompact);
- }
-
- /*
- * @param filesToCompact Files to compact. Can be null.
- * @return True if we should run a major compaction.
- */
- private boolean isMajorCompaction(final List filesToCompact)
- throws IOException {
- boolean result = false;
- Path mapdir = HStoreFile.getMapDir(this.basedir, this.info.getEncodedName(),
- this.family.getName());
- long lowTimestamp = getLowestTimestamp(fs, mapdir);
- long now = System.currentTimeMillis();
- if (lowTimestamp > 0l && lowTimestamp < (now - this.majorCompactionTime)) {
- // Major compaction time has elapsed.
- long elapsedTime = now - lowTimestamp;
- if (filesToCompact != null && filesToCompact.size() == 1 &&
- filesToCompact.get(0).isMajorCompaction() &&
- (this.ttl == HConstants.FOREVER || elapsedTime < this.ttl)) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Skipping major compaction of " + this.storeNameStr +
- " because one (major) compacted file only and elapsedTime " +
- elapsedTime + "ms is < ttl=" + this.ttl);
- }
- } else {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Major compaction triggered on store " + this.storeNameStr +
- "; time since last major compaction " + (now - lowTimestamp) + "ms");
- }
- result = true;
- }
- }
- return result;
- }
-
- /*
- * Create readers for the passed in list of HStoreFiles and add them to
- * readers list. Used compacting.
- * @param readers Add Readers here.
- * @param files List of HSFs to make Readers for.
- * @return Count of rows for bloom filter sizing. Returns -1 if no bloom
- * filter wanted.
- */
- private int createReaders(final List rs,
- final List files)
- throws IOException {
- /* We create a new list of MapFile.Reader objects so we don't screw up
- * the caching associated with the currently-loaded ones. Our iteration-
- * based access pattern is practically designed to ruin the cache.
- */
- int nrows = -1;
- for (HStoreFile file: files) {
- try {
- // TODO: Readers are opened without block-cache enabled. Means we don't
- // get the prefetch that makes the read faster. But we don't want to
- // enable block-cache for these readers that are about to be closed.
- // The compaction of soon-to-be closed readers will probably force out
- // blocks that may be needed servicing real-time requests whereas
- // compaction runs in background. TODO: We know we're going to read
- // this file straight through. Leverage this fact. Use a big buffer
- // client side to speed things up or read it all up into memory one file
- // at a time or pull local and memory-map the file but leave the writer
- // up in hdfs?
- BloomFilterMapFile.Reader reader = file.getReader(fs, false, false);
- rs.add(reader);
- // Compute the size of the new bloomfilter if needed
- if (this.family.isBloomfilter()) {
- nrows += reader.getBloomFilterSize();
- }
- } catch (IOException e) {
- // Add info about which file threw exception. It may not be in the
- // exception message so output a message here where we know the
- // culprit.
- LOG.warn("Failed with " + e.toString() + ": " + file.toString());
- closeCompactionReaders(rs);
- throw e;
- }
- }
- return nrows;
- }
-
- /*
- * @param r List to reverse
- * @return A reversed array of content of readers
- */
- private MapFile.Reader [] reverse(final List r) {
- List copy = new ArrayList(r);
- Collections.reverse(copy);
- return copy.toArray(new MapFile.Reader[0]);
- }
-
- /*
- * @param rdrs List of readers
- * @param keys Current keys
- * @param done Which readers are done
- * @return The lowest current key in passed rdrs
- */
- private int getLowestKey(final MapFile.Reader [] rdrs,
- final HStoreKey [] keys, final boolean [] done) {
- int lowestKey = -1;
- for (int i = 0; i < rdrs.length; i++) {
- if (done[i]) {
- continue;
- }
- if (lowestKey < 0) {
- lowestKey = i;
- } else {
- if (keys[i].compareTo(keys[lowestKey]) < 0) {
- lowestKey = i;
- }
- }
- }
- return lowestKey;
- }
-
- /*
- * Compact a list of MapFile.Readers into MapFile.Writer.
- *
- * We work by iterating through the readers in parallel looking at newest
- * store file first. We always increment the lowest-ranked one. Updates to a
- * single row/column will appear ranked by timestamp.
- * @param compactedOut Where to write compaction.
- * @param pReaders List of readers sorted oldest to newest.
- * @param majorCompaction True to force a major compaction regardless of
- * thresholds
- * @throws IOException
- */
- private void compact(final MapFile.Writer compactedOut,
- final List pReaders, final boolean majorCompaction)
- throws IOException {
- // Reverse order so newest store file is first.
- MapFile.Reader[] rdrs = reverse(pReaders);
- try {
- HStoreKey [] keys = new HStoreKey[rdrs.length];
- ImmutableBytesWritable [] vals = new ImmutableBytesWritable[rdrs.length];
- boolean [] done = new boolean[rdrs.length];
- for(int i = 0; i < rdrs.length; i++) {
- keys[i] = new HStoreKey(HConstants.EMPTY_BYTE_ARRAY, this.info);
- vals[i] = new ImmutableBytesWritable();
- done[i] = false;
- }
-
- // Now, advance through the readers in order. This will have the
- // effect of a run-time sort of the entire dataset.
- int numDone = 0;
- for (int i = 0; i < rdrs.length; i++) {
- rdrs[i].reset();
- done[i] = !rdrs[i].next(keys[i], vals[i]);
- if (done[i]) {
- numDone++;
- }
- }
-
- long now = System.currentTimeMillis();
- int timesSeen = 0;
- HStoreKey lastSeen = new HStoreKey();
- HStoreKey lastDelete = null;
- while (numDone < done.length) {
- // Get lowest key in all store files.
- int lowestKey = getLowestKey(rdrs, keys, done);
- HStoreKey sk = keys[lowestKey];
- // If its same row and column as last key, increment times seen.
- if (HStoreKey.equalsTwoRowKeys(info, lastSeen.getRow(), sk.getRow())
- && Bytes.equals(lastSeen.getColumn(), sk.getColumn())) {
- timesSeen++;
- // Reset last delete if not exact timestamp -- lastDelete only stops
- // exactly the same key making it out to the compacted store file.
- if (lastDelete != null &&
- lastDelete.getTimestamp() != sk.getTimestamp()) {
- lastDelete = null;
- }
- } else {
- timesSeen = 1;
- lastDelete = null;
- }
-
- // Don't write empty rows or columns. Only remove cells on major
- // compaction. Remove if expired of > VERSIONS
- if (sk.getRow().length != 0 && sk.getColumn().length != 0) {
- ImmutableBytesWritable value = vals[lowestKey];
- if (!majorCompaction) {
- // Write out all values if not a major compaction.
- compactedOut.append(sk, value);
- } else {
- boolean expired = false;
- boolean deleted = false;
- if (timesSeen <= family.getMaxVersions() &&
- !(expired = isExpired(sk, ttl, now))) {
- // If this value key is same as a deleted key, skip
- if (lastDelete != null && sk.equals(lastDelete)) {
- deleted = true;
- } else if (HLogEdit.isDeleted(value.get())) {
- // If a deleted value, skip
- deleted = true;
- lastDelete = new HStoreKey(sk);
- } else {
- compactedOut.append(sk, vals[lowestKey]);
- }
- }
- if (expired || deleted) {
- // HBASE-855 remove one from timesSeen because it did not make it
- // past expired check -- don't count against max versions.
- timesSeen--;
- }
- }
- }
-
- // Update last-seen items
- lastSeen = new HStoreKey(sk);
-
- // Advance the smallest key. If that reader's all finished, then
- // mark it as done.
- if (!rdrs[lowestKey].next(keys[lowestKey], vals[lowestKey])) {
- done[lowestKey] = true;
- rdrs[lowestKey].close();
- rdrs[lowestKey] = null;
- numDone++;
- }
- }
- } finally {
- closeCompactionReaders(Arrays.asList(rdrs));
- }
- }
-
- private void closeCompactionReaders(final List rdrs) {
- for (MapFile.Reader r: rdrs) {
- try {
- if (r != null) {
- r.close();
- }
- } catch (IOException e) {
- LOG.warn("Exception closing reader for " + this.storeNameStr, e);
- }
- }
- }
-
- /*
- * It's assumed that the compactLock will be acquired prior to calling this
- * method! Otherwise, it is not thread-safe!
- *
- * It works by processing a compaction that's been written to disk.
- *
- * It is usually invoked at the end of a compaction, but might also be
- * invoked at HStore startup, if the prior execution died midway through.
- *
- *
Moving the compacted TreeMap into place means:
- *
- * 1) Moving the new compacted MapFile into place
- * 2) Unload all replaced MapFiles, close and collect list to delete.
- * 3) Loading the new TreeMap.
- * 4) Compute new store size
- *
- *
- * @param compactedFiles list of files that were compacted
- * @param compactedFile HStoreFile that is the result of the compaction
- * @throws IOException
- */
- private void completeCompaction(final List compactedFiles,
- final HStoreFile compactedFile)
- throws IOException {
- this.lock.writeLock().lock();
- try {
- // 1. Moving the new MapFile into place.
- HStoreFile finalCompactedFile = new HStoreFile(conf, fs, basedir,
- this.info, family.getName(), -1, null,
- compactedFile.isMajorCompaction());
- if (LOG.isDebugEnabled()) {
- LOG.debug("moving " + FSUtils.getPath(compactedFile.getMapFilePath()) +
- " to " + FSUtils.getPath(finalCompactedFile.getMapFilePath()));
- }
- if (!compactedFile.rename(this.fs, finalCompactedFile)) {
- LOG.error("Failed move of compacted file " +
- finalCompactedFile.getMapFilePath().toString());
- return;
- }
-
- // 2. Unload all replaced MapFiles, close and collect list to delete.
- synchronized (storefiles) {
- Map toDelete = new HashMap();
- for (Map.Entry e : this.storefiles.entrySet()) {
- if (!compactedFiles.contains(e.getValue())) {
- continue;
- }
- Long key = e.getKey();
- MapFile.Reader reader = this.readers.remove(key);
- if (reader != null) {
- reader.close();
- }
- toDelete.put(key, e.getValue());
- }
-
- try {
- // 3. Loading the new TreeMap.
- // Change this.storefiles so it reflects new state but do not
- // delete old store files until we have sent out notification of
- // change in case old files are still being accessed by outstanding
- // scanners.
- for (Long key : toDelete.keySet()) {
- this.storefiles.remove(key);
- }
- // Add new compacted Reader and store file.
- Long orderVal = Long.valueOf(finalCompactedFile.loadInfo(fs));
- this.readers.put(orderVal,
- // Use a block cache (if configured) for this reader since
- // it is the only one.
- finalCompactedFile.getReader(this.fs,
- this.family.isBloomfilter(),
- this.family.isBlockCacheEnabled()));
- this.storefiles.put(orderVal, finalCompactedFile);
- // Tell observers that list of Readers has changed.
- notifyChangedReadersObservers();
- // Finally, delete old store files.
- for (HStoreFile hsf : toDelete.values()) {
- hsf.delete();
- }
- } catch (IOException e) {
- e = RemoteExceptionHandler.checkIOException(e);
- LOG.error("Failed replacing compacted files for " +
- this.storeNameStr +
- ". Compacted file is " + finalCompactedFile.toString() +
- ". Files replaced are " + compactedFiles.toString() +
- " some of which may have been already removed", e);
- }
- // 4. Compute new store size
- storeSize = 0L;
- for (HStoreFile hsf : storefiles.values()) {
- storeSize += hsf.length();
- }
- }
- } finally {
- this.lock.writeLock().unlock();
- }
- }
-
- // ////////////////////////////////////////////////////////////////////////////
- // Accessors.
- // (This is the only section that is directly useful!)
- //////////////////////////////////////////////////////////////////////////////
-
- /**
- * Return all the available columns for the given key. The key indicates a
- * row and timestamp, but not a column name.
- *
- * The returned object should map column names to Cells.
- */
- void getFull(HStoreKey key, final Set columns,
- final int numVersions, Map results)
- throws IOException {
- int versions = versionsToReturn(numVersions);
-
- Map deletes =
- new TreeMap(Bytes.BYTES_COMPARATOR);
-
- // if the key is null, we're not even looking for anything. return.
- if (key == null) {
- return;
- }
-
- this.lock.readLock().lock();
-
- // get from the memcache first.
- memcache.getFull(key, columns, versions, deletes, results);
-
- try {
- MapFile.Reader[] maparray = getReaders();
-
- // examine each mapfile
- for (int i = maparray.length - 1; i >= 0; i--) {
- MapFile.Reader map = maparray[i];
-
- // synchronize on the map so that no one else iterates it at the same
- // time
- getFullFromMapFile(map, key, columns, versions, deletes, results);
- }
- } finally {
- this.lock.readLock().unlock();
- }
- }
-
- private void getFullFromMapFile(MapFile.Reader map, HStoreKey key,
- Set columns, int numVersions, Map deletes,
- Map results)
- throws IOException {
- synchronized(map) {
- long now = System.currentTimeMillis();
-
- // seek back to the beginning
- map.reset();
-
- // seek to the closest key that should match the row we're looking for
- ImmutableBytesWritable readval = new ImmutableBytesWritable();
- HStoreKey readkey = (HStoreKey)map.getClosest(key, readval);
- if (readkey == null) {
- return;
- }
-
- do {
- byte [] readcol = readkey.getColumn();
-
- // if we're looking for this column (or all of them), and there isn't
- // already a value for this column in the results map or there is a value
- // but we haven't collected enough versions yet, and the key we
- // just read matches, then we'll consider it
- if ((columns == null || columns.contains(readcol))
- && (!results.containsKey(readcol)
- || results.get(readcol).getNumValues() < numVersions)
- && key.matchesWithoutColumn(readkey)) {
- // if the value of the cell we're looking at right now is a delete,
- // we need to treat it differently
- if(HLogEdit.isDeleted(readval.get())) {
- // if it's not already recorded as a delete or recorded with a more
- // recent delete timestamp, record it for later
- if (!deletes.containsKey(readcol)
- || deletes.get(readcol).longValue() < readkey.getTimestamp()) {
- deletes.put(readcol, Long.valueOf(readkey.getTimestamp()));
- }
- } else if (!(deletes.containsKey(readcol)
- && deletes.get(readcol).longValue() >= readkey.getTimestamp()) ) {
- // So the cell itself isn't a delete, but there may be a delete
- // pending from earlier in our search. Only record this result if
- // there aren't any pending deletes.
- if (!(deletes.containsKey(readcol) &&
- deletes.get(readcol).longValue() >= readkey.getTimestamp())) {
- if (!isExpired(readkey, ttl, now)) {
- if (!results.containsKey(readcol)) {
- results.put(readcol,
- new Cell(readval.get(), readkey.getTimestamp()));
- } else {
- results.get(readcol).add(readval.get(),
- readkey.getTimestamp());
- }
- // need to reinstantiate the readval so we can reuse it,
- // otherwise next iteration will destroy our result
- readval = new ImmutableBytesWritable();
- }
- }
- }
- } else if (HStoreKey.compareTwoRowKeys(info,key.getRow(), readkey.getRow()) < 0) {
- // if we've crossed into the next row, then we can just stop
- // iterating
- break;
- }
-
- } while(map.next(readkey, readval));
- }
- }
-
- /**
- * @return Array of readers ordered oldest to newest.
- */
- public MapFile.Reader [] getReaders() {
- return this.readers.values().
- toArray(new MapFile.Reader[this.readers.size()]);
- }
-
- /*
- * @param wantedVersions How many versions were asked for.
- * @return wantedVersions or this families' MAX_VERSIONS.
- */
- private int versionsToReturn(final int wantedVersions) {
- if (wantedVersions <= 0) {
- throw new IllegalArgumentException("Number of versions must be > 0");
- }
- // Make sure we do not return more than maximum versions for this store.
- return wantedVersions > this.family.getMaxVersions()?
- this.family.getMaxVersions(): wantedVersions;
- }
-
- /**
- * Get the value for the indicated HStoreKey. Grab the target value and the
- * previous numVersions - 1 values, as well.
- *
- * Use {@link HConstants.ALL_VERSIONS} to retrieve all versions.
- * @param key
- * @param numVersions Number of versions to fetch. Must be > 0.
- * @return values for the specified versions
- * @throws IOException
- */
- Cell[] get(final HStoreKey key, final int numVersions) throws IOException {
- // This code below is very close to the body of the getKeys method. Any
- // changes in the flow below should also probably be done in getKeys.
- // TODO: Refactor so same code used.
- long now = System.currentTimeMillis();
- int versions = versionsToReturn(numVersions);
- // Keep a list of deleted cell keys. We need this because as we go through
- // the memcache and store files, the cell with the delete marker may be
- // in one store and the old non-delete cell value in a later store.
- // If we don't keep around the fact that the cell was deleted in a newer
- // record, we end up returning the old value if user is asking for more
- // than one version. This List of deletes should not be large since we
- // are only keeping rows and columns that match those set on the get and
- // which have delete values. If memory usage becomes an issue, could
- // redo as bloom filter.
- Set deletes = new HashSet();
- this.lock.readLock().lock();
- try {
- // Check the memcache
- List results = this.memcache.get(key, versions, deletes, now);
- // If we got sufficient versions from memcache, return.
- if (results.size() == versions) {
- return results.toArray(new Cell[results.size()]);
- }
- MapFile.Reader[] maparray = getReaders();
- // Returned array is sorted with the most recent addition last.
- for(int i = maparray.length - 1;
- i >= 0 && !hasEnoughVersions(versions, results); i--) {
- MapFile.Reader r = maparray[i];
- synchronized (r) {
- // Do the priming read
- ImmutableBytesWritable readval = new ImmutableBytesWritable();
- HStoreKey readkey = (HStoreKey)r.getClosest(key, readval);
- if (readkey == null) {
- // map.getClosest returns null if the passed key is > than the
- // last key in the map file. getClosest is a bit of a misnomer
- // since it returns exact match or the next closest key AFTER not
- // BEFORE. We use getClosest because we're usually passed a
- // key that has a timestamp of maximum long to indicate we want
- // most recent update.
- continue;
- }
- if (!readkey.matchesRowCol(key)) {
- continue;
- }
- if (get(readkey, readval.get(), versions, results, deletes, now)) {
- break;
- }
- for (readval = new ImmutableBytesWritable();
- r.next(readkey, readval) && readkey.matchesRowCol(key);
- readval = new ImmutableBytesWritable()) {
- if (get(readkey, readval.get(), versions, results, deletes, now)) {
- break;
- }
- }
- }
- }
- return results.size() == 0 ?
- null : results.toArray(new Cell[results.size()]);
- } finally {
- this.lock.readLock().unlock();
- }
- }
-
- /*
- * Look at one key/value.
- * @param key
- * @param value
- * @param versions
- * @param results
- * @param deletes
- * @param now
- * @return True if we have enough versions.
- */
- private boolean get(final HStoreKey key, final byte [] value,
- final int versions, final List results,
- final Set deletes, final long now) {
- if (!HLogEdit.isDeleted(value)) {
- if (notExpiredAndNotInDeletes(this.ttl, key, now, deletes)) {
- results.add(new Cell(value, key.getTimestamp()));
- }
- // Perhaps only one version is wanted. I could let this
- // test happen later in the for loop test but it would cost
- // the allocation of an ImmutableBytesWritable.
- if (hasEnoughVersions(versions, results)) {
- return true;
- }
- } else {
- // Is this copy necessary?
- deletes.add(new HStoreKey(key));
- }
- return false;
- }
-
- /*
- * Small method to check if we are over the max number of versions
- * or we acheived this family max versions.
- * The later happens when we have the situation described in HBASE-621.
- * @param versions
- * @param c
- * @return
- */
- private boolean hasEnoughVersions(final int versions, final List c) {
- return c.size() >= versions;
- }
-
- /**
- * Get versions of keys matching the origin key's
- * row/column/timestamp and those of an older vintage.
- * @param origin Where to start searching.
- * @param versions How many versions to return. Pass
- * {@link HConstants#ALL_VERSIONS} to retrieve all.
- * @param now
- * @param columnPattern regex pattern for column matching. if columnPattern
- * is not null, we use column pattern to match columns. And the columnPattern
- * only works when origin's column is null or its length is zero.
- * @return Matching keys.
- * @throws IOException
- */
- public List getKeys(final HStoreKey origin, final int versions,
- final long now, final Pattern columnPattern)
- throws IOException {
- // This code below is very close to the body of the get method. Any
- // changes in the flow below should also probably be done in get. TODO:
- // Refactor so same code used.
- Set deletes = new HashSet();
- this.lock.readLock().lock();
- try {
- // Check the memcache
- List keys =
- this.memcache.getKeys(origin, versions, deletes, now, columnPattern);
- // If we got sufficient versions from memcache, return.
- if (keys.size() >= versions) {
- return keys;
- }
- MapFile.Reader[] maparray = getReaders();
- // Returned array is sorted with the most recent addition last.
- for (int i = maparray.length - 1;
- i >= 0 && keys.size() < versions; i--) {
- MapFile.Reader map = maparray[i];
- synchronized(map) {
- map.reset();
- // Do the priming read
- ImmutableBytesWritable readval = new ImmutableBytesWritable();
- HStoreKey readkey = (HStoreKey)map.getClosest(origin, readval);
- if (readkey == null) {
- // map.getClosest returns null if the passed key is > than the
- // last key in the map file. getClosest is a bit of a misnomer
- // since it returns exact match or the next closest key AFTER not
- // BEFORE.
- continue;
- }
- do {
- // if the row matches, we might want this one.
- if (rowMatches(origin, readkey)) {
- // if the column pattern is not null, we use it for column matching.
- // we will skip the keys whose column doesn't match the pattern.
- if (columnPattern != null) {
- if (!(columnPattern.matcher(Bytes.toString(readkey.getColumn())).matches())) {
- continue;
- }
- }
- // if the cell address matches, then we definitely want this key.
- if (cellMatches(origin, readkey)) {
- // Store key if isn't deleted or superceded by memcache
- if (!HLogEdit.isDeleted(readval.get())) {
- if (notExpiredAndNotInDeletes(this.ttl, readkey, now, deletes)) {
- keys.add(new HStoreKey(readkey));
- }
- if (keys.size() >= versions) {
- break;
- }
- } else {
- deletes.add(new HStoreKey(readkey));
- }
- } else {
- // the cell doesn't match, but there might be more with different
- // timestamps, so move to the next key
- continue;
- }
- } else {
- // the row doesn't match, so we've gone too far.
- break;
- }
- } while (map.next(readkey, readval)); // advance to the next key
- }
- }
- return keys;
- } finally {
- this.lock.readLock().unlock();
- }
- }
-
- /**
- * Find the key that matches row exactly, or the one that immediately
- * preceeds it. WARNING: Only use this method on a table where writes occur
- * with stricly increasing timestamps. This method assumes this pattern of
- * writes in order to make it reasonably performant.
- * @param row
- * @return Found row
- * @throws IOException
- */
- @SuppressWarnings("unchecked")
- byte [] getRowKeyAtOrBefore(final byte [] row)
- throws IOException{
- // Map of HStoreKeys that are candidates for holding the row key that
- // most closely matches what we're looking for. We'll have to update it as
- // deletes are found all over the place as we go along before finally
- // reading the best key out of it at the end.
- SortedMap candidateKeys = new TreeMap(
- new HStoreKey.HStoreKeyWritableComparator(info));
-
- // Keep a list of deleted cell keys. We need this because as we go through
- // the store files, the cell with the delete marker may be in one file and
- // the old non-delete cell value in a later store file. If we don't keep
- // around the fact that the cell was deleted in a newer record, we end up
- // returning the old value if user is asking for more than one version.
- // This List of deletes should not be large since we are only keeping rows
- // and columns that match those set on the scanner and which have delete
- // values. If memory usage becomes an issue, could redo as bloom filter.
- Set deletes = new HashSet();
- this.lock.readLock().lock();
- try {
- // First go to the memcache. Pick up deletes and candidates.
- this.memcache.getRowKeyAtOrBefore(row, candidateKeys, deletes);
-
- // Process each store file. Run through from newest to oldest.
- // This code below is very close to the body of the getKeys method.
- MapFile.Reader[] maparray = getReaders();
- for (int i = maparray.length - 1; i >= 0; i--) {
- // Update the candidate keys from the current map file
- rowAtOrBeforeFromMapFile(maparray[i], row, candidateKeys, deletes);
- }
- // Return the best key from candidateKeys
- byte [] result =
- candidateKeys.isEmpty()? null: candidateKeys.lastKey().getRow();
- return result;
- } finally {
- this.lock.readLock().unlock();
- }
- }
-
- /*
- * Check an individual MapFile for the row at or before a given key
- * and timestamp
- * @param map
- * @param row
- * @param candidateKeys
- * @throws IOException
- */
- private void rowAtOrBeforeFromMapFile(final MapFile.Reader map,
- final byte [] row, final SortedMap candidateKeys,
- final Set deletes)
- throws IOException {
- HStoreKey startKey = new HStoreKey();
- ImmutableBytesWritable startValue = new ImmutableBytesWritable();
- synchronized(map) {
- // Don't bother with the rest of this if the file is empty
- map.reset();
- if (!map.next(startKey, startValue)) {
- return;
- }
- startKey.setHRegionInfo(this.info);
- // If start row for this file is beyond passed in row, return; nothing
- // in here is of use to us.
- if (HStoreKey.compareTwoRowKeys(this.info, startKey.getRow(), row) > 0) {
- return;
- }
- long now = System.currentTimeMillis();
- // if there aren't any candidate keys yet, we'll do some things different
- if (candidateKeys.isEmpty()) {
- rowAtOrBeforeCandidate(startKey, map, row, candidateKeys, deletes, now);
- } else {
- rowAtOrBeforeWithCandidates(startKey, map, row, candidateKeys, deletes,
- now);
- }
- }
- }
-
- /* Find a candidate for row that is at or before passed row in passed
- * mapfile.
- * @param startKey First key in the mapfile.
- * @param map
- * @param row
- * @param candidateKeys
- * @param now
- * @throws IOException
- */
- private void rowAtOrBeforeCandidate(final HStoreKey startKey,
- final MapFile.Reader map, final byte[] row,
- final SortedMap candidateKeys,
- final Set deletes, final long now)
- throws IOException {
- // if the row we're looking for is past the end of this mapfile, set the
- // search key to be the last key. If its a deleted key, then we'll back
- // up to the row before and return that.
- HStoreKey finalKey = getFinalKey(map);
- HStoreKey searchKey = null;
- if (HStoreKey.compareTwoRowKeys(info,finalKey.getRow(), row) < 0) {
- searchKey = finalKey;
- } else {
- searchKey = new HStoreKey(row, this.info);
- if (searchKey.compareTo(startKey) < 0) {
- searchKey = startKey;
- }
- }
- rowAtOrBeforeCandidate(map, searchKey, candidateKeys, deletes, now);
- }
-
- /*
- * @param ttlSetting
- * @param hsk
- * @param now
- * @param deletes
- * @return True if key has not expired and is not in passed set of deletes.
- */
- static boolean notExpiredAndNotInDeletes(final long ttl,
- final HStoreKey hsk, final long now, final Set deletes) {
- return !isExpired(hsk, ttl, now) &&
- (deletes == null || !deletes.contains(hsk));
- }
-
- static boolean isExpired(final HStoreKey hsk, final long ttl,
- final long now) {
- return ttl != HConstants.FOREVER && now > hsk.getTimestamp() + ttl;
- }
-
- /* Find a candidate for row that is at or before passed key, sk, in mapfile.
- * @param map
- * @param sk Key to go search the mapfile with.
- * @param candidateKeys
- * @param now
- * @throws IOException
- * @see {@link #rowAtOrBeforeCandidate(HStoreKey, org.apache.hadoop.io.MapFile.Reader, byte[], SortedMap, long)}
- */
- private void rowAtOrBeforeCandidate(final MapFile.Reader map,
- final HStoreKey sk, final SortedMap candidateKeys,
- final Set deletes, final long now)
- throws IOException {
- HStoreKey searchKey = sk;
- if (searchKey.getHRegionInfo() == null) {
- searchKey.setHRegionInfo(this.info);
- }
- HStoreKey readkey = null;
- ImmutableBytesWritable readval = new ImmutableBytesWritable();
- HStoreKey knownNoGoodKey = null;
- for (boolean foundCandidate = false; !foundCandidate;) {
- // Seek to the exact row, or the one that would be immediately before it
- readkey = (HStoreKey)map.getClosest(searchKey, readval, true);
- if (readkey == null) {
- // If null, we are at the start or end of the file.
- break;
- }
- HStoreKey deletedOrExpiredRow = null;
- do {
- // Set this region into the readkey.
- readkey.setHRegionInfo(this.info);
- // If we have an exact match on row, and it's not a delete, save this
- // as a candidate key
- if (HStoreKey.equalsTwoRowKeys(this.info, readkey.getRow(),
- searchKey.getRow())) {
- if (!HLogEdit.isDeleted(readval.get())) {
- if (handleNonDelete(readkey, now, deletes, candidateKeys)) {
- foundCandidate = true;
- // NOTE! Continue.
- continue;
- }
- }
- HStoreKey copy = addCopyToDeletes(readkey, deletes);
- if (deletedOrExpiredRow == null) {
- deletedOrExpiredRow = copy;
- }
- } else if (HStoreKey.compareTwoRowKeys(this.info, readkey.getRow(),
- searchKey.getRow()) > 0) {
- // if the row key we just read is beyond the key we're searching for,
- // then we're done.
- break;
- } else {
- // So, the row key doesn't match, but we haven't gone past the row
- // we're seeking yet, so this row is a candidate for closest
- // (assuming that it isn't a delete).
- if (!HLogEdit.isDeleted(readval.get())) {
- if (handleNonDelete(readkey, now, deletes, candidateKeys)) {
- foundCandidate = true;
- // NOTE: Continue
- continue;
- }
- }
- HStoreKey copy = addCopyToDeletes(readkey, deletes);
- if (deletedOrExpiredRow == null) {
- deletedOrExpiredRow = copy;
- }
- }
- } while(map.next(readkey, readval) && (knownNoGoodKey == null ||
- readkey.compareTo(knownNoGoodKey) < 0));
-
- // If we get here and have no candidates but we did find a deleted or
- // expired candidate, we need to look at the key before that
- if (!foundCandidate && deletedOrExpiredRow != null) {
- knownNoGoodKey = deletedOrExpiredRow;
- searchKey = new HStoreKey.BeforeThisStoreKey(deletedOrExpiredRow);
- } else {
- // No candidates and no deleted or expired candidates. Give up.
- break;
- }
- }
-
- // Arriving here just means that we consumed the whole rest of the map
- // without going "past" the key we're searching for. we can just fall
- // through here.
- }
-
- /*
- * @param key Key to copy and add to deletes
- * @param deletes
- * @return Instance of the copy added to deletes
- */
- private HStoreKey addCopyToDeletes(final HStoreKey key,
- final Set deletes) {
- HStoreKey copy = new HStoreKey(key);
- deletes.add(copy);
- return copy;
- }
-
- private void rowAtOrBeforeWithCandidates(final HStoreKey startKey,
- final MapFile.Reader map, final byte[] row,
- final SortedMap candidateKeys,
- final Set deletes, final long now)
- throws IOException {
- HStoreKey readkey = null;
- ImmutableBytesWritable readval = new ImmutableBytesWritable();
-
- // if there are already candidate keys, we need to start our search
- // at the earliest possible key so that we can discover any possible
- // deletes for keys between the start and the search key. Back up to start
- // of the row in case there are deletes for this candidate in this mapfile
- // BUT do not backup before the first key in the mapfile else getClosest
- // will return null
- HStoreKey searchKey = new HStoreKey(candidateKeys.firstKey().getRow(), this.info);
- if (searchKey.compareTo(startKey) < 0) {
- searchKey = startKey;
- }
-
- // Seek to the exact row, or the one that would be immediately before it
- readkey = (HStoreKey)map.getClosest(searchKey, readval, true);
- if (readkey == null) {
- // If null, we are at the start or end of the file.
- // Didn't find anything that would match, so return
- return;
- }
-
- do {
- // if we have an exact match on row, and it's not a delete, save this
- // as a candidate key
- if (Bytes.equals(readkey.getRow(), row)) {
- handleKey(readkey, readval.get(), now, deletes, candidateKeys);
- } else if (HStoreKey.compareTwoRowKeys(info,
- readkey.getRow(), row) > 0 ) {
- // if the row key we just read is beyond the key we're searching for,
- // then we're done.
- break;
- } else {
- // So, the row key doesn't match, but we haven't gone past the row
- // we're seeking yet, so this row is a candidate for closest
- // (assuming that it isn't a delete).
- handleKey(readkey, readval.get(), now, deletes, candidateKeys);
- }
- } while(map.next(readkey, readval));
- }
-
- /*
- * @param readkey
- * @param now
- * @param deletes
- * @param candidateKeys
- */
- private void handleKey(final HStoreKey readkey, final byte [] value,
- final long now, final Set deletes,
- final SortedMap candidateKeys) {
- if (!HLogEdit.isDeleted(value)) {
- handleNonDelete(readkey, now, deletes, candidateKeys);
- } else {
- // Pass copy because readkey will change next time next is called.
- handleDeleted(new HStoreKey(readkey), candidateKeys, deletes);
- }
- }
-
- /*
- * @param readkey
- * @param now
- * @param deletes
- * @param candidateKeys
- * @return True if we added a candidate.
- */
- private boolean handleNonDelete(final HStoreKey readkey, final long now,
- final Set deletes, final Map candidateKeys) {
- if (notExpiredAndNotInDeletes(this.ttl, readkey, now, deletes)) {
- candidateKeys.put(stripTimestamp(readkey),
- Long.valueOf(readkey.getTimestamp()));
- return true;
- }
- return false;
- }
-
- /* Handle keys whose values hold deletes.
- * Add to the set of deletes and then if the candidate keys contain any that
- * might match by timestamp, then check for a match and remove it if it's too
- * young to survive the delete
- * @param k Be careful; if key was gotten from a Mapfile, pass in a copy.
- * Values gotten by 'nexting' out of Mapfiles will change in each invocation.
- * @param candidateKeys
- * @param deletes
- */
- static void handleDeleted(final HStoreKey k,
- final SortedMap candidateKeys,
- final Set deletes) {
- deletes.add(k);
- HStoreKey strippedKey = stripTimestamp(k);
- if (candidateKeys.containsKey(strippedKey)) {
- long bestCandidateTs =
- candidateKeys.get(strippedKey).longValue();
- if (bestCandidateTs <= k.getTimestamp()) {
- candidateKeys.remove(strippedKey);
- }
- }
- }
-
- /*
- * @param mf MapFile to dig in.
- * @return Final key from passed mf
- * @throws IOException
- */
- private HStoreKey getFinalKey(final MapFile.Reader mf) throws IOException {
- HStoreKey finalKey = new HStoreKey();
- mf.finalKey(finalKey);
- finalKey.setHRegionInfo(this.info);
- return finalKey;
- }
-
- static HStoreKey stripTimestamp(HStoreKey key) {
- return new HStoreKey(key.getRow(), key.getColumn(), key.getHRegionInfo());
- }
-
- /*
- * Test that the target matches the origin cell address. If the
- * origin has an empty column, then it's assumed to mean any column
- * matches and only match on row and timestamp. Otherwise, it compares the
- * keys with HStoreKey.matchesRowCol().
- * @param origin The key we're testing against
- * @param target The key we're testing
- */
- private boolean cellMatches(HStoreKey origin, HStoreKey target){
- // if the origin's column is empty, then we're matching any column
- if (Bytes.equals(origin.getColumn(), HConstants.EMPTY_BYTE_ARRAY)) {
- // if the row matches, then...
- if (HStoreKey.equalsTwoRowKeys(info, target.getRow(), origin.getRow())) {
- // check the timestamp
- return target.getTimestamp() <= origin.getTimestamp();
- }
- return false;
- }
- // otherwise, we want to match on row and column
- return target.matchesRowCol(origin);
- }
-
- /*
- * Test that the target matches the origin. If the origin
- * has an empty column, then it just tests row equivalence. Otherwise, it uses
- * HStoreKey.matchesRowCol().
- * @param origin Key we're testing against
- * @param target Key we're testing
- */
- private boolean rowMatches(HStoreKey origin, HStoreKey target){
- // if the origin's column is empty, then we're matching any column
- if (Bytes.equals(origin.getColumn(), HConstants.EMPTY_BYTE_ARRAY)) {
- // if the row matches, then...
- return HStoreKey.equalsTwoRowKeys(info, target.getRow(), origin.getRow());
- }
- // otherwise, we want to match on row and column
- return target.matchesRowCol(origin);
- }
-
- /**
- * Determines if HStore can be split
- * @param force Whether to force a split or not.
- * @return a StoreSize if store can be split, null otherwise
- */
- StoreSize checkSplit(final boolean force) {
- if (this.storefiles.size() <= 0) {
- return null;
- }
- if (!force && (storeSize < this.desiredMaxFileSize)) {
- return null;
- }
- this.lock.readLock().lock();
- try {
- // Not splitable if we find a reference store file present in the store.
- boolean splitable = true;
- long maxSize = 0L;
- Long mapIndex = Long.valueOf(0L);
- // Iterate through all the MapFiles
- synchronized (storefiles) {
- for (Map.Entry e: storefiles.entrySet()) {
- HStoreFile curHSF = e.getValue();
- if (splitable) {
- splitable = !curHSF.isReference();
- if (!splitable) {
- // RETURN IN MIDDLE OF FUNCTION!!! If not splitable, just return.
- if (LOG.isDebugEnabled()) {
- LOG.debug(curHSF + " is not splittable");
- }
- return null;
- }
- }
- long size = curHSF.length();
- if (size > maxSize) {
- // This is the largest one so far
- maxSize = size;
- mapIndex = e.getKey();
- }
- }
- }
-
- // Cast to HbaseReader.
- HBaseMapFile.HBaseReader r =
- (HBaseMapFile.HBaseReader)this.readers.get(mapIndex);
- // Get first, last, and mid keys.
- r.reset();
- HStoreKey firstKey = new HStoreKey();
- HStoreKey lastKey = new HStoreKey();
- r.next(firstKey, new ImmutableBytesWritable());
- r.finalKey(lastKey);
- HStoreKey mk = (HStoreKey)r.midKey();
- if (mk != null) {
- // if the midkey is the same as the first and last keys, then we cannot
- // (ever) split this region.
- if (HStoreKey.equalsTwoRowKeys(info, mk.getRow(), firstKey.getRow()) &&
- HStoreKey.equalsTwoRowKeys(info, mk.getRow(), lastKey.getRow())) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("cannot split because midkey is the same as first or last row");
- }
- return null;
- }
- return new StoreSize(maxSize, mk.getRow());
- }
- } catch(IOException e) {
- LOG.warn("Failed getting store size for " + this.storeNameStr, e);
- } finally {
- this.lock.readLock().unlock();
- }
- return null;
- }
-
- /** @return aggregate size of HStore */
- public long getSize() {
- return storeSize;
- }
-
- //////////////////////////////////////////////////////////////////////////////
- // File administration
- //////////////////////////////////////////////////////////////////////////////
-
- /**
- * Return a scanner for both the memcache and the HStore files
- */
- protected InternalScanner getScanner(long timestamp, byte [][] targetCols,
- byte [] firstRow, RowFilterInterface filter)
- throws IOException {
- lock.readLock().lock();
- try {
- return new HStoreScanner(this, targetCols, firstRow, timestamp, filter);
- } finally {
- lock.readLock().unlock();
- }
- }
-
- @Override
- public String toString() {
- return this.storeNameStr;
- }
-
- /**
- * @param p Path to check.
- * @return True if the path has format of a HStoreFile reference.
- */
- public static boolean isReference(final Path p) {
- return isReference(p, REF_NAME_PARSER.matcher(p.getName()));
- }
-
- private static boolean isReference(final Path p, final Matcher m) {
- if (m == null || !m.matches()) {
- LOG.warn("Failed match of store file name " + p.toString());
- throw new RuntimeException("Failed match of store file name " +
- p.toString());
- }
- return m.groupCount() > 1 && m.group(2) != null;
- }
-
- /**
- * @return Current list of store files.
- */
- SortedMap getStorefiles() {
- synchronized (this.storefiles) {
- SortedMap copy =
- new TreeMap(this.storefiles);
- return copy;
- }
- }
-
- /**
- * @return Count of store files
- */
- int getStorefilesCount() {
- return this.storefiles.size();
- }
-
- /**
- * @return The size of the store file indexes, in bytes.
- * @throws IOException if there was a problem getting file sizes from the
- * filesystem
- */
- long getStorefilesIndexSize() throws IOException {
- long size = 0;
- for (HStoreFile s: storefiles.values())
- size += s.indexLength();
- return size;
- }
-
- /*
- * Datastructure that holds size and key.
- */
- class StoreSize {
- private final long size;
- private final byte[] key;
- StoreSize(long size, byte[] key) {
- this.size = size;
- this.key = new byte[key.length];
- System.arraycopy(key, 0, this.key, 0, key.length);
- }
- /* @return the size */
- long getSize() {
- return size;
- }
- /* @return the key */
- byte[] getKey() {
- return key;
- }
- }
-
- HRegionInfo getHRegionInfo() {
- return this.info;
- }
-}
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
deleted file mode 100644
index 2162276..0000000
--- a/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
+++ /dev/null
@@ -1,558 +0,0 @@
-/**
- * Copyright 2007 The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.regionserver;
-
-import java.io.DataInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Random;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.io.BloomFilterMapFile;
-import org.apache.hadoop.hbase.io.HalfMapFileReader;
-import org.apache.hadoop.hbase.io.Reference;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.io.MapFile;
-import org.apache.hadoop.hbase.io.SequenceFile;
-
-/**
- * A HStore data file. HStores usually have one or more of these files. They
- * are produced by flushing the memcache to disk.
- *
- * Each HStore maintains a bunch of different data files. The filename is a
- * mix of the parent dir, the region name, the column name, and a file
- * identifier. The name may also be a reference to a store file located
- * elsewhere. This class handles all that path-building stuff for you.
- *
- * An HStoreFile usually tracks 4 things: its parent dir, the region
- * identifier, the column family, and the file identifier. If you know those
- * four things, you know how to obtain the right HStoreFile. HStoreFiles may
- * also reference store files in another region serving either from
- * the top-half of the remote file or from the bottom-half. Such references
- * are made fast splitting regions.
- *
- * Plain HStoreFiles are named for a randomly generated id as in:
- * 1278437856009925445 A file by this name is made in both the
- * mapfiles and info subdirectories of a
- * HStore columnfamily directoy: E.g. If the column family is 'anchor:', then
- * under the region directory there is a subdirectory named 'anchor' within
- * which is a 'mapfiles' and 'info' subdirectory. In each will be found a
- * file named something like 1278437856009925445, one to hold the
- * data in 'mapfiles' and one under 'info' that holds the sequence id for this
- * store file.
- *
- * References to store files located over in some other region look like
- * this:
- * 1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184:
- * i.e. an id followed by the name of the referenced region. The data
- * ('mapfiles') of HStoreFile references are empty. The accompanying
- * info file contains the
- * midkey, the id of the remote store we're referencing and whether we're
- * to serve the top or bottom region of the remote store file. Note, a region
- * is not splitable if it has instances of store file references (References
- * are cleaned up by compactions).
- *
- * When merging or splitting HRegions, we might want to modify one of the
- * params for an HStoreFile (effectively moving it elsewhere).
- */
-public class HStoreFile implements HConstants {
- static final Log LOG = LogFactory.getLog(HStoreFile.class.getName());
- static final byte INFO_SEQ_NUM = 0;
- static final byte MAJOR_COMPACTION = INFO_SEQ_NUM + 1;
- static final String HSTORE_DATFILE_DIR = "mapfiles";
- static final String HSTORE_INFO_DIR = "info";
- static final String HSTORE_FILTER_DIR = "filter";
-
- private final static Random rand = new Random();
-
- private final Path basedir;
- private final int encodedRegionName;
- private final byte [] colFamily;
- private final long fileId;
- private final HBaseConfiguration conf;
- private final FileSystem fs;
- private final Reference reference;
- private final HRegionInfo hri;
- /* If true, this file was product of a major compaction.
- */
- private boolean majorCompaction = false;
- private long indexLength;
-
- /**
- * Constructor that fully initializes the object
- * @param conf Configuration object
- * @param basedir qualified path that is parent of region directory
- * @param colFamily name of the column family
- * @param fileId file identifier
- * @param ref Reference to another HStoreFile.
- * @param hri The region info for this file (HACK HBASE-868). TODO: Fix.
- * @throws IOException
- */
- HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
- final HRegionInfo hri, byte [] colFamily, long fileId,
- final Reference ref)
- throws IOException {
- this(conf, fs, basedir, hri, colFamily, fileId, ref, false);
- }
-
- /**
- * Constructor that fully initializes the object
- * @param conf Configuration object
- * @param basedir qualified path that is parent of region directory
- * @param colFamily name of the column family
- * @param fileId file identifier
- * @param ref Reference to another HStoreFile.
- * @param hri The region info for this file (HACK HBASE-868). TODO: Fix.
- * @param mc Try if this file was result of a major compression.
- * @throws IOException
- */
- HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
- final HRegionInfo hri, byte [] colFamily, long fileId,
- final Reference ref, final boolean mc)
- throws IOException {
- this.conf = conf;
- this.fs = fs;
- this.basedir = basedir;
- this.encodedRegionName = hri.getEncodedName();
- this.colFamily = colFamily;
- this.hri = hri;
-
- long id = fileId;
- if (id == -1) {
- Path mapdir = HStoreFile.getMapDir(basedir, encodedRegionName, colFamily);
- Path testpath = null;
- do {
- id = Math.abs(rand.nextLong());
- testpath = new Path(mapdir, createHStoreFilename(id, -1));
- } while(fs.exists(testpath));
- }
- this.fileId = id;
-
- // If a reference, construction does not write the pointer files. Thats
- // done by invocations of writeReferenceFiles(hsf, fs). Happens at split.
- this.reference = ref;
- this.majorCompaction = mc;
- }
-
- /** @return the region name */
- boolean isReference() {
- return reference != null;
- }
-
- Reference getReference() {
- return reference;
- }
-
- int getEncodedRegionName() {
- return this.encodedRegionName;
- }
-
- /** @return the column family */
- byte [] getColFamily() {
- return colFamily;
- }
-
- /** @return the file identifier */
- long getFileId() {
- return fileId;
- }
-
- // Build full filenames from those components
-
- /** @return path for MapFile */
- Path getMapFilePath() {
- if (isReference()) {
- return getMapFilePath(encodedRegionName, fileId,
- reference.getEncodedRegionName());
- }
- return getMapFilePath(this.encodedRegionName, fileId);
- }
-
- private Path getMapFilePath(final Reference r) {
- if (r == null) {
- return getMapFilePath();
- }
- return getMapFilePath(r.getEncodedRegionName(), r.getFileId());
- }
-
- private Path getMapFilePath(final int encodedName, final long fid) {
- return getMapFilePath(encodedName, fid, HRegionInfo.NO_HASH);
- }
-
- private Path getMapFilePath(final int encodedName, final long fid,
- final int ern) {
- return new Path(HStoreFile.getMapDir(basedir, encodedName, colFamily),
- createHStoreFilename(fid, ern));
- }
-
- /** @return path for info file */
- Path getInfoFilePath() {
- if (isReference()) {
- return getInfoFilePath(encodedRegionName, fileId,
- reference.getEncodedRegionName());
-
- }
- return getInfoFilePath(encodedRegionName, fileId);
- }
-
- private Path getInfoFilePath(final int encodedName, final long fid) {
- return getInfoFilePath(encodedName, fid, HRegionInfo.NO_HASH);
- }
-
- private Path getInfoFilePath(final int encodedName, final long fid,
- final int ern) {
- return new Path(HStoreFile.getInfoDir(basedir, encodedName, colFamily),
- createHStoreFilename(fid, ern));
- }
-
- // File handling
-
- /*
- * Split by making two new store files that reference top and bottom regions
- * of original store file.
- * @param midKey
- * @param dstA
- * @param dstB
- * @param fs
- * @param c
- * @throws IOException
- *
- * @param midKey the key which will be the starting key of the second region
- * @param dstA the file which will contain keys from the start of the source
- * @param dstB the file which will contain keys from midKey to end of source
- * @param fs file system
- * @param c configuration
- * @throws IOException
- */
- void splitStoreFile(final HStoreFile dstA, final HStoreFile dstB,
- final FileSystem fs)
- throws IOException {
- dstA.writeReferenceFiles(fs);
- dstB.writeReferenceFiles(fs);
- }
-
- void writeReferenceFiles(final FileSystem fs)
- throws IOException {
- createOrFail(fs, getMapFilePath());
- writeSplitInfo(fs);
- }
-
- /*
- * If reference, create and write the remote store file id, the midkey and
- * whether we're going against the top file region of the referent out to
- * the info file.
- * @param p Path to info file.
- * @param hsf
- * @param fs
- * @throws IOException
- */
- private void writeSplitInfo(final FileSystem fs) throws IOException {
- Path p = getInfoFilePath();
- if (fs.exists(p)) {
- throw new IOException("File already exists " + p.toString());
- }
- FSDataOutputStream out = fs.create(p);
- try {
- reference.write(out);
- } finally {
- out.close();
- }
- }
-
- /**
- * @see #writeSplitInfo(FileSystem fs)
- */
- static Reference readSplitInfo(final Path p, final FileSystem fs)
- throws IOException {
- FSDataInputStream in = fs.open(p);
- try {
- Reference r = new Reference();
- r.readFields(in);
- return r;
- } finally {
- in.close();
- }
- }
-
- private void createOrFail(final FileSystem fs, final Path p)
- throws IOException {
- if (fs.exists(p)) {
- throw new IOException("File already exists " + p.toString());
- }
- if (!fs.createNewFile(p)) {
- throw new IOException("Failed create of " + p);
- }
- }
-
- /**
- * Reads in an info file
- *
- * @param filesystem file system
- * @return The sequence id contained in the info file
- * @throws IOException
- */
- long loadInfo(final FileSystem filesystem) throws IOException {
- Path p = null;
- if (isReference()) {
- p = getInfoFilePath(reference.getEncodedRegionName(),
- this.reference.getFileId());
- } else {
- p = getInfoFilePath();
- }
- long length = filesystem.getFileStatus(p).getLen();
- boolean hasMoreThanSeqNum = length > (Byte.SIZE + Bytes.SIZEOF_LONG);
- DataInputStream in = new DataInputStream(filesystem.open(p));
- try {
- byte flag = in.readByte();
- if (flag == INFO_SEQ_NUM) {
- if (hasMoreThanSeqNum) {
- flag = in.readByte();
- if (flag == MAJOR_COMPACTION) {
- this.majorCompaction = in.readBoolean();
- }
- }
- return in.readLong();
- }
- throw new IOException("Cannot process log file: " + p);
- } finally {
- in.close();
- }
- }
-
- /**
- * Writes the file-identifier to disk
- *
- * @param filesystem file system
- * @param infonum file id
- * @throws IOException
- */
- void writeInfo(final FileSystem filesystem, final long infonum)
- throws IOException {
- writeInfo(filesystem, infonum, false);
- }
-
- /**
- * Writes the file-identifier to disk
- *
- * @param filesystem file system
- * @param infonum file id
- * @param mc True if this file is product of a major compaction
- * @throws IOException
- */
- void writeInfo(final FileSystem filesystem, final long infonum,
- final boolean mc)
- throws IOException {
- Path p = getInfoFilePath();
- FSDataOutputStream out = filesystem.create(p);
- try {
- out.writeByte(INFO_SEQ_NUM);
- out.writeLong(infonum);
- if (mc) {
- // Set whether major compaction flag on this file.
- this.majorCompaction = mc;
- out.writeByte(MAJOR_COMPACTION);
- out.writeBoolean(mc);
- }
- } finally {
- out.close();
- }
- }
-
- /**
- * Delete store map files.
- * @throws IOException
- */
- public void delete() throws IOException {
- fs.delete(getMapFilePath(), true);
- fs.delete(getInfoFilePath(), true);
- }
-
- /**
- * Renames the mapfiles and info directories under the passed
- * hsf directory.
- * @param fs
- * @param hsf
- * @return True if succeeded.
- * @throws IOException
- */
- public boolean rename(final FileSystem fs, final HStoreFile hsf)
- throws IOException {
- Path src = getMapFilePath();
- if (!fs.exists(src)) {
- throw new FileNotFoundException(src.toString());
- }
- boolean success = fs.rename(src, hsf.getMapFilePath());
- if (!success) {
- LOG.warn("Failed rename of " + src + " to " + hsf.getMapFilePath());
- } else {
- src = getInfoFilePath();
- if (!fs.exists(src)) {
- throw new FileNotFoundException(src.toString());
- }
- success = fs.rename(src, hsf.getInfoFilePath());
- if (!success) {
- LOG.warn("Failed rename of " + src + " to " + hsf.getInfoFilePath());
- }
- }
- return success;
- }
-
- /**
- * Get reader for the store file map file.
- * Client is responsible for closing file when done.
- * @param fs
- * @param bloomFilter If true, a bloom filter exists
- * @param blockCacheEnabled If true, MapFile blocks should be cached.
- * @return BloomFilterMapFile.Reader
- * @throws IOException
- */
- public synchronized BloomFilterMapFile.Reader getReader(final FileSystem fs,
- final boolean bloomFilter, final boolean blockCacheEnabled)
- throws IOException {
- if (isReference()) {
- return new HalfMapFileReader(fs,
- getMapFilePath(reference).toString(), conf,
- reference.getFileRegion(), reference.getMidkey(), bloomFilter,
- blockCacheEnabled, this.hri);
- }
- return new BloomFilterMapFile.Reader(fs, getMapFilePath().toString(),
- conf, bloomFilter, blockCacheEnabled, this.hri);
- }
-
- /**
- * Get a store file writer.
- * Client is responsible for closing file when done.
- * @param fs
- * @param compression Pass SequenceFile.CompressionType.NONE
- * for none.
- * @param bloomFilter If true, create a bloom filter
- * @param nrows number of rows expected. Required if bloomFilter is true.
- * @return MapFile.Writer
- * @throws IOException
- */
- public MapFile.Writer getWriter(final FileSystem fs,
- final SequenceFile.CompressionType compression,
- final boolean bloomFilter, int nrows)
- throws IOException {
- if (isReference()) {
- throw new IOException("Illegal Access: Cannot get a writer on a" +
- "HStoreFile reference");
- }
- return new BloomFilterMapFile.Writer(conf, fs,
- getMapFilePath().toString(), compression, bloomFilter, nrows, this.hri);
- }
-
- /**
- * @return Length of the store map file. If a reference, size is
- * approximation.
- * @throws IOException
- */
- public long length() throws IOException {
- Path p = new Path(getMapFilePath(reference), MapFile.DATA_FILE_NAME);
- long l = p.getFileSystem(conf).getFileStatus(p).getLen();
- return (isReference())? l / 2: l;
- }
-
- /**
- * @return Length of the store map file index.
- * @throws IOException
- */
- public synchronized long indexLength() throws IOException {
- if (indexLength == 0) {
- Path p = new Path(getMapFilePath(reference), MapFile.INDEX_FILE_NAME);
- indexLength = p.getFileSystem(conf).getFileStatus(p).getLen();
- }
- return indexLength;
- }
-
- @Override
- public String toString() {
- return encodedRegionName + "/" + Bytes.toString(colFamily) + "/" + fileId +
- (isReference()? "-" + reference.toString(): "");
- }
-
- /**
- * @return True if this file was made by a major compaction.
- */
- public boolean isMajorCompaction() {
- return this.majorCompaction;
- }
-
- private static String createHStoreFilename(final long fid,
- final int encodedRegionName) {
- return Long.toString(fid) +
- ((encodedRegionName != HRegionInfo.NO_HASH)?
- "." + encodedRegionName : "");
- }
-
- /**
- * @param dir Base directory
- * @param encodedRegionName Encoding of region name.
- * @param f Column family.
- * @return path for map file directory
- */
- public static Path getMapDir(Path dir, int encodedRegionName,
- final byte [] f) {
- return getFamilySubDir(dir, encodedRegionName, f, HSTORE_DATFILE_DIR);
- }
-
- /**
- * @param dir Base directory
- * @param encodedRegionName Encoding of region name.
- * @param f Column family.
- * @return the info directory path
- */
- public static Path getInfoDir(Path dir, int encodedRegionName, byte [] f) {
- return getFamilySubDir(dir, encodedRegionName, f, HSTORE_INFO_DIR);
- }
-
- /**
- * @param dir Base directory
- * @param encodedRegionName Encoding of region name.
- * @param f Column family.
- * @return the bloom filter directory path
- */
- @Deprecated
- public static Path getFilterDir(Path dir, int encodedRegionName,
- final byte [] f) {
- return getFamilySubDir(dir, encodedRegionName, f, HSTORE_FILTER_DIR);
- }
-
- /*
- * @param base Base directory
- * @param encodedRegionName Encoding of region name.
- * @param f Column family.
- * @param subdir Subdirectory to create under column family/store directory.
- * @return
- */
- private static Path getFamilySubDir(final Path base,
- final int encodedRegionName, final byte [] f, final String subdir) {
- return new Path(base, new Path(Integer.toString(encodedRegionName),
- new Path(Bytes.toString(f), subdir)));
- }
-}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java b/src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java
deleted file mode 100644
index 33f5a3b..0000000
--- a/src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java
+++ /dev/null
@@ -1,344 +0,0 @@
-/**
- * Copyright 2008 The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.regionserver;
-
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.locks.ReentrantReadWriteLock;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HStoreKey;
-import org.apache.hadoop.hbase.filter.RowFilterInterface;
-import org.apache.hadoop.hbase.io.Cell;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.io.MapFile;
-
-/**
- * Scanner scans both the memcache and the HStore
- */
-class HStoreScanner implements InternalScanner, ChangedReadersObserver {
- static final Log LOG = LogFactory.getLog(HStoreScanner.class);
-
- private InternalScanner[] scanners;
- private TreeMap[] resultSets;
- private HStoreKey[] keys;
- private boolean wildcardMatch = false;
- private boolean multipleMatchers = false;
- private RowFilterInterface dataFilter;
- private HStore store;
- private final long timestamp;
- private final byte [][] targetCols;
-
- // Indices for memcache scanner and hstorefile scanner.
- private static final int MEMS_INDEX = 0;
- private static final int HSFS_INDEX = MEMS_INDEX + 1;
-
- // Used around transition from no storefile to the first.
- private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
-
- // Used to indicate that the scanner has closed (see HBASE-1107)
- private final AtomicBoolean closing = new AtomicBoolean(false);
-
- /** Create an Scanner with a handle on the memcache and HStore files. */
- @SuppressWarnings("unchecked")
- HStoreScanner(HStore store, byte [][] targetCols, byte [] firstRow,
- long timestamp, RowFilterInterface filter)
- throws IOException {
- this.store = store;
- this.dataFilter = filter;
- if (null != dataFilter) {
- dataFilter.reset();
- }
- this.scanners = new InternalScanner[2];
- this.resultSets = new TreeMap[scanners.length];
- this.keys = new HStoreKey[scanners.length];
- // Save these args in case we need them later handling change in readers
- // See updateReaders below.
- this.timestamp = timestamp;
- this.targetCols = targetCols;
-
- try {
- scanners[MEMS_INDEX] =
- store.memcache.getScanner(timestamp, targetCols, firstRow);
- scanners[HSFS_INDEX] =
- new StoreFileScanner(store, timestamp, targetCols, firstRow);
- for (int i = MEMS_INDEX; i < scanners.length; i++) {
- checkScannerFlags(i);
- }
- } catch (IOException e) {
- doClose();
- throw e;
- }
-
- // Advance to the first key in each scanner.
- // All results will match the required column-set and scanTime.
- for (int i = MEMS_INDEX; i < scanners.length; i++) {
- setupScanner(i);
- }
-
- this.store.addChangedReaderObserver(this);
- }
-
- /*
- * @param i Index.
- */
- private void checkScannerFlags(final int i) {
- if (this.scanners[i].isWildcardScanner()) {
- this.wildcardMatch = true;
- }
- if (this.scanners[i].isMultipleMatchScanner()) {
- this.multipleMatchers = true;
- }
- }
-
- /*
- * Do scanner setup.
- * @param i
- * @throws IOException
- */
- private void setupScanner(final int i) throws IOException {
- this.keys[i] = new HStoreKey();
- this.resultSets[i] = new TreeMap(Bytes.BYTES_COMPARATOR);
- if (this.scanners[i] != null && !this.scanners[i].next(this.keys[i], this.resultSets[i])) {
- closeScanner(i);
- }
- }
-
- /** @return true if the scanner is a wild card scanner */
- public boolean isWildcardScanner() {
- return this.wildcardMatch;
- }
-
- /** @return true if the scanner is a multiple match scanner */
- public boolean isMultipleMatchScanner() {
- return this.multipleMatchers;
- }
-
- public boolean next(HStoreKey key, SortedMap results)
- throws IOException {
- this.lock.readLock().lock();
- try {
- // Filtered flag is set by filters. If a cell has been 'filtered out'
- // -- i.e. it is not to be returned to the caller -- the flag is 'true'.
- boolean filtered = true;
- boolean moreToFollow = true;
- while (filtered && moreToFollow) {
- // Find the lowest-possible key.
- byte [] chosenRow = null;
- long chosenTimestamp = -1;
- for (int i = 0; i < this.keys.length; i++) {
- if (scanners[i] != null &&
- (chosenRow == null ||
- (HStoreKey.compareTwoRowKeys(store.getHRegionInfo(),
- keys[i].getRow(), chosenRow) < 0) ||
- ((HStoreKey.compareTwoRowKeys(store.getHRegionInfo(),
- keys[i].getRow(), chosenRow) == 0) &&
- (keys[i].getTimestamp() > chosenTimestamp)))) {
- chosenRow = keys[i].getRow();
- chosenTimestamp = keys[i].getTimestamp();
- }
- }
-
- // Filter whole row by row key?
- filtered = dataFilter != null? dataFilter.filterRowKey(chosenRow) : false;
-
- // Store the key and results for each sub-scanner. Merge them as
- // appropriate.
- if (chosenTimestamp >= 0 && !filtered) {
- // Here we are setting the passed in key with current row+timestamp
- key.setRow(chosenRow);
- key.setVersion(chosenTimestamp);
- key.setColumn(HConstants.EMPTY_BYTE_ARRAY);
- // Keep list of deleted cell keys within this row. We need this
- // because as we go through scanners, the delete record may be in an
- // early scanner and then the same record with a non-delete, non-null
- // value in a later. Without history of what we've seen, we'll return
- // deleted values. This List should not ever grow too large since we
- // are only keeping rows and columns that match those set on the
- // scanner and which have delete values. If memory usage becomes a
- // problem, could redo as bloom filter.
- Set deletes = new HashSet();
- for (int i = 0; i < scanners.length && !filtered; i++) {
- while ((scanners[i] != null
- && !filtered
- && moreToFollow)
- && (HStoreKey.compareTwoRowKeys(store.getHRegionInfo(),
- keys[i].getRow(), chosenRow) == 0)) {
- // If we are doing a wild card match or there are multiple
- // matchers per column, we need to scan all the older versions of
- // this row to pick up the rest of the family members
- if (!wildcardMatch
- && !multipleMatchers
- && (keys[i].getTimestamp() != chosenTimestamp)) {
- break;
- }
-
- // NOTE: We used to do results.putAll(resultSets[i]);
- // but this had the effect of overwriting newer
- // values with older ones. So now we only insert
- // a result if the map does not contain the key.
- HStoreKey hsk = new HStoreKey(key.getRow(),
- HConstants.EMPTY_BYTE_ARRAY,
- key.getTimestamp(), this.store.getHRegionInfo());
- for (Map.Entry e : resultSets[i].entrySet()) {
- hsk.setColumn(e.getKey());
- if (HLogEdit.isDeleted(e.getValue().getValue())) {
- // Only first key encountered is added; deletes is a Set.
- deletes.add(new HStoreKey(hsk));
- } else if (!deletes.contains(hsk) &&
- !filtered &&
- moreToFollow &&
- !results.containsKey(e.getKey())) {
- if (dataFilter != null) {
- // Filter whole row by column data?
- filtered = dataFilter.filterColumn(chosenRow, e.getKey(),
- e.getValue().getValue());
- if (filtered) {
- results.clear();
- break;
- }
- }
- results.put(e.getKey(), e.getValue());
- }
- }
- resultSets[i].clear();
- if (!scanners[i].next(keys[i], resultSets[i])) {
- closeScanner(i);
- }
- }
- }
- }
-
- for (int i = 0; i < scanners.length; i++) {
- // If the current scanner is non-null AND has a lower-or-equal
- // row label, then its timestamp is bad. We need to advance it.
- while ((scanners[i] != null) &&
- (HStoreKey.compareTwoRowKeys(store.getHRegionInfo(),
- keys[i].getRow(), chosenRow) <= 0)) {
- resultSets[i].clear();
- if (!scanners[i].next(keys[i], resultSets[i])) {
- closeScanner(i);
- }
- }
- }
-
- moreToFollow = chosenTimestamp >= 0;
-
- if (dataFilter != null) {
- if (dataFilter.filterAllRemaining()) {
- moreToFollow = false;
- }
- }
-
- if (results.size() <= 0 && !filtered) {
- // There were no results found for this row. Marked it as
- // 'filtered'-out otherwise we will not move on to the next row.
- filtered = true;
- }
- }
-
- // If we got no results, then there is no more to follow.
- if (results == null || results.size() <= 0) {
- moreToFollow = false;
- }
-
- // Make sure scanners closed if no more results
- if (!moreToFollow) {
- for (int i = 0; i < scanners.length; i++) {
- if (null != scanners[i]) {
- closeScanner(i);
- }
- }
- }
-
- return moreToFollow;
- } finally {
- this.lock.readLock().unlock();
- }
- }
-
- /** Shut down a single scanner */
- void closeScanner(int i) {
- try {
- try {
- scanners[i].close();
- } catch (IOException e) {
- LOG.warn(store.storeName + " failed closing scanner " + i, e);
- }
- } finally {
- scanners[i] = null;
- keys[i] = null;
- resultSets[i] = null;
- }
- }
-
- public void close() {
- this.closing.set(true);
- this.store.deleteChangedReaderObserver(this);
- doClose();
- }
-
- private void doClose() {
- for (int i = MEMS_INDEX; i < scanners.length; i++) {
- if (scanners[i] != null) {
- closeScanner(i);
- }
- }
- }
-
- // Implementation of ChangedReadersObserver
-
- public void updateReaders() throws IOException {
- if (this.closing.get()) {
- return;
- }
- this.lock.writeLock().lock();
- try {
- MapFile.Reader [] readers = this.store.getReaders();
- if (this.scanners[HSFS_INDEX] == null && readers != null &&
- readers.length > 0) {
- // Presume that we went from no readers to at least one -- need to put
- // a HStoreScanner in place.
- try {
- // I think its safe getting key from mem at this stage -- it shouldn't have
- // been flushed yet
- this.scanners[HSFS_INDEX] = new StoreFileScanner(this.store,
- this.timestamp, this. targetCols, this.keys[MEMS_INDEX].getRow());
- checkScannerFlags(HSFS_INDEX);
- setupScanner(HSFS_INDEX);
- LOG.debug("Added a StoreFileScanner to outstanding HStoreScanner");
- } catch (IOException e) {
- doClose();
- throw e;
- }
- }
- } finally {
- this.lock.writeLock().unlock();
- }
- }
-}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java b/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
index 00e0629..de257a4 100644
--- a/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
@@ -40,7 +40,6 @@ import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.util.Bytes;
@@ -57,8 +56,6 @@ class Memcache {
private static final Log LOG = LogFactory.getLog(Memcache.class);
private final long ttl;
-
- private HRegionInfo regionInfo;
// Note that since these structures are always accessed with a lock held,
// so no additional synchronization is required.
@@ -76,8 +73,6 @@ class Memcache {
*/
public Memcache() {
this.ttl = HConstants.FOREVER;
- // Set default to be the first meta region.
- this.regionInfo = HRegionInfo.FIRST_META_REGIONINFO;
this.memcache = createSynchronizedSortedMap();
this.snapshot = createSynchronizedSortedMap();
}
@@ -87,21 +82,21 @@ class Memcache {
* @param ttl The TTL for cache entries, in milliseconds.
* @param regionInfo The HRI for this cache
*/
- public Memcache(final long ttl, HRegionInfo regionInfo) {
+ public Memcache(final long ttl) {
this.ttl = ttl;
- this.regionInfo = regionInfo;
this.memcache = createSynchronizedSortedMap();
this.snapshot = createSynchronizedSortedMap();
}
/*
* Utility method using HSKWritableComparator
- * @return sycnhronized sorted map of HStoreKey to byte arrays.
+ * @return synchronized sorted map of HStoreKey to byte arrays.
*/
+ @SuppressWarnings("unchecked")
private SortedMap createSynchronizedSortedMap() {
return Collections.synchronizedSortedMap(
new TreeMap(
- new HStoreKey.HStoreKeyWritableComparator(this.regionInfo)));
+ new HStoreKey.HStoreKeyWritableComparator()));
}
/**
@@ -266,7 +261,7 @@ class Memcache {
if (b == null) {
return a;
}
- return HStoreKey.compareTwoRowKeys(regionInfo, a, b) <= 0? a: b;
+ return HStoreKey.compareTwoRowKeys(a, b) <= 0? a: b;
}
/**
@@ -296,12 +291,12 @@ class Memcache {
synchronized (map) {
// Make an HSK with maximum timestamp so we get past most of the current
// rows cell entries.
- HStoreKey hsk = new HStoreKey(row, HConstants.LATEST_TIMESTAMP, this.regionInfo);
+ HStoreKey hsk = new HStoreKey(row, HConstants.LATEST_TIMESTAMP);
SortedMap | | |