diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java index 85ebc71..9e576fa 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -37,7 +37,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.RawComparator; import org.apache.hbase.Cell; -import org.apache.hbase.cell.CellComparator; +import org.apache.hbase.CellComparator; import com.google.common.primitives.Longs; @@ -918,7 +918,7 @@ public class KeyValue implements Cell, HeapSize { return "empty"; } return keyToString(this.bytes, this.offset + ROW_OFFSET, getKeyLength()) + - "/vlen=" + getValueLength() + "/ts=" + memstoreTS; + "/vlen=" + getValueLength() + "/mvcc=" + memstoreTS; } /** @@ -2289,8 +2289,10 @@ public class KeyValue implements Cell, HeapSize { } /** - * @param in Where to read bytes from - * @return KeyValue created by deserializing from in + * @param in Where to read bytes from. Creates a byte array to hold the KeyValue + * backing bytes copied from the steam. + * @return KeyValue created by deserializing from in OR if we find a length + * of zero, we will return null which can be useful marking a stream as done. * @throws IOException */ public static KeyValue create(final DataInput in) throws IOException { @@ -2301,7 +2303,8 @@ public class KeyValue implements Cell, HeapSize { * Create a KeyValue reading length from in * @param length * @param in - * @return Created KeyValue + * @return Created KeyValue OR if we find a length of zero, we will return null which + * can be useful marking a stream as done. * @throws IOException */ public static KeyValue create(int length, final DataInput in) throws IOException { diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTool.java index a2f75b9..543b5e4 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTool.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTool.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.IterableUtils; import org.apache.hadoop.io.WritableUtils; import org.apache.hbase.Cell; -import org.apache.hbase.cell.CellTool; +import org.apache.hbase.CellTool; /** * static convenience methods for dealing with KeyValues and collections of KeyValues diff --git a/hbase-common/src/main/java/org/apache/hbase/Cell.java b/hbase-common/src/main/java/org/apache/hbase/Cell.java index 8247bab..3a57e8b 100644 --- a/hbase-common/src/main/java/org/apache/hbase/Cell.java +++ b/hbase-common/src/main/java/org/apache/hbase/Cell.java @@ -20,7 +20,6 @@ package org.apache.hbase; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hbase.cell.CellTool; /** diff --git a/hbase-common/src/main/java/org/apache/hbase/CellComparator.java b/hbase-common/src/main/java/org/apache/hbase/CellComparator.java new file mode 100644 index 0000000..f6a3c25 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/CellComparator.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase; + +import java.io.Serializable; +import java.util.Comparator; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.primitives.Longs; + +/** + * Compare two traditional HBase cells. + * + * Note: This comparator is not valid for -ROOT- and .META. tables. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class CellComparator implements Comparator, Serializable{ + private static final long serialVersionUID = -8760041766259623329L; + + @Override + public int compare(Cell a, Cell b) { + return compareStatic(a, b); + } + + + public static int compareStatic(Cell a, Cell b) { + //row + int c = Bytes.compareTo( + a.getRowArray(), a.getRowOffset(), a.getRowLength(), + b.getRowArray(), b.getRowOffset(), b.getRowLength()); + if (c != 0) return c; + + //family + c = Bytes.compareTo( + a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(), + b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength()); + if (c != 0) return c; + + //qualifier + c = Bytes.compareTo( + a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(), + b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength()); + if (c != 0) return c; + + //timestamp: later sorts first + c = -Longs.compare(a.getTimestamp(), b.getTimestamp()); + if (c != 0) return c; + + //type + c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte()); + if (c != 0) return c; + + //mvccVersion: later sorts first + return -Longs.compare(a.getMvccVersion(), b.getMvccVersion()); + } + + + /**************** equals ****************************/ + + public static boolean equals(Cell a, Cell b){ + if (!areKeyLengthsEqual(a, b)) { + return false; + } + //TODO compare byte[]'s in reverse since later bytes more likely to differ + return 0 == compareStatic(a, b); + } + + public static boolean equalsRow(Cell a, Cell b){ + if(!areRowLengthsEqual(a, b)){ + return false; + } + return 0 == Bytes.compareTo( + a.getRowArray(), a.getRowOffset(), a.getRowLength(), + b.getRowArray(), b.getRowOffset(), b.getRowLength()); + } + + + /********************* hashCode ************************/ + + /** + * Returns a hash code that is always the same for two Cells having a matching equals(..) result. + * Currently does not guard against nulls, but it could if necessary. + */ + public static int hashCode(Cell cell){ + if (cell == null) {// return 0 for empty Cell + return 0; + } + + //pre-calculate the 3 hashes made of byte ranges + int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); + int familyHash = Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); + int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); + + //combine the 6 sub-hashes + int hash = 31 * rowHash + familyHash; + hash = 31 * hash + qualifierHash; + hash = 31 * hash + (int)cell.getTimestamp(); + hash = 31 * hash + cell.getTypeByte(); + hash = 31 * hash + (int)cell.getMvccVersion(); + return hash; + } + + + /******************** lengths *************************/ + + public static boolean areKeyLengthsEqual(Cell a, Cell b) { + return a.getRowLength() == b.getRowLength() + && a.getFamilyLength() == b.getFamilyLength() + && a.getQualifierLength() == b.getQualifierLength(); + } + + public static boolean areRowLengthsEqual(Cell a, Cell b) { + return a.getRowLength() == b.getRowLength(); + } + + + /***************** special cases ****************************/ + + /** + * special case for KeyValue.equals + */ + private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) { + //row + int c = Bytes.compareTo( + a.getRowArray(), a.getRowOffset(), a.getRowLength(), + b.getRowArray(), b.getRowOffset(), b.getRowLength()); + if (c != 0) return c; + + //family + c = Bytes.compareTo( + a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(), + b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength()); + if (c != 0) return c; + + //qualifier + c = Bytes.compareTo( + a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(), + b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength()); + if (c != 0) return c; + + //timestamp: later sorts first + c = -Longs.compare(a.getTimestamp(), b.getTimestamp()); + if (c != 0) return c; + + //type + c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte()); + return c; + } + + /** + * special case for KeyValue.equals + */ + public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){ + return 0 == compareStaticIgnoreMvccVersion(a, b); + } + +} diff --git a/hbase-common/src/main/java/org/apache/hbase/CellTool.java b/hbase-common/src/main/java/org/apache/hbase/CellTool.java new file mode 100644 index 0000000..96bb509 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/CellTool.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.util.ByteRange; + +@InterfaceAudience.Private +@InterfaceStability.Evolving +public final class CellTool { + + /******************* ByteRange *******************************/ + + public static ByteRange fillRowRange(Cell cell, ByteRange range) { + return range.set(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); + } + + public static ByteRange fillFamilyRange(Cell cell, ByteRange range) { + return range.set(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); + } + + public static ByteRange fillQualifierRange(Cell cell, ByteRange range) { + return range.set(cell.getQualifierArray(), cell.getQualifierOffset(), + cell.getQualifierLength()); + } + + + /***************** get individual arrays for tests ************/ + + public static byte[] getRowArray(Cell cell){ + byte[] output = new byte[cell.getRowLength()]; + copyRowTo(cell, output, 0); + return output; + } + + public static byte[] getFamilyArray(Cell cell){ + byte[] output = new byte[cell.getFamilyLength()]; + copyFamilyTo(cell, output, 0); + return output; + } + + public static byte[] getQualifierArray(Cell cell){ + byte[] output = new byte[cell.getQualifierLength()]; + copyQualifierTo(cell, output, 0); + return output; + } + + public static byte[] getValueArray(Cell cell){ + byte[] output = new byte[cell.getValueLength()]; + copyValueTo(cell, output, 0); + return output; + } + + + /******************** copyTo **********************************/ + + public static int copyRowTo(Cell cell, byte[] destination, int destinationOffset) { + System.arraycopy(cell.getRowArray(), cell.getRowOffset(), destination, destinationOffset, + cell.getRowLength()); + return destinationOffset + cell.getRowLength(); + } + + public static int copyFamilyTo(Cell cell, byte[] destination, int destinationOffset) { + System.arraycopy(cell.getFamilyArray(), cell.getFamilyOffset(), destination, destinationOffset, + cell.getFamilyLength()); + return destinationOffset + cell.getFamilyLength(); + } + + public static int copyQualifierTo(Cell cell, byte[] destination, int destinationOffset) { + System.arraycopy(cell.getQualifierArray(), cell.getQualifierOffset(), destination, + destinationOffset, cell.getQualifierLength()); + return destinationOffset + cell.getQualifierLength(); + } + + public static int copyValueTo(Cell cell, byte[] destination, int destinationOffset) { + System.arraycopy(cell.getValueArray(), cell.getValueOffset(), destination, destinationOffset, + cell.getValueLength()); + return destinationOffset + cell.getValueLength(); + } + + + /********************* misc *************************************/ + + public static byte getRowByte(Cell cell, int index) { + return cell.getRowArray()[cell.getRowOffset() + index]; + } + + + /********************** KeyValue (move to KeyValueUtils) *********************/ + + public static ByteBuffer getValueBufferShallowCopy(Cell cell) { + ByteBuffer buffer = ByteBuffer.wrap(cell.getValueArray(), cell.getValueOffset(), + cell.getValueLength()); +// buffer.position(buffer.limit());//make it look as if value was appended + return buffer; + } + +} diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java deleted file mode 100644 index 19d6b54..0000000 --- a/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.cell; - -import java.io.Serializable; -import java.util.Comparator; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.Cell; - -import com.google.common.primitives.Longs; - -/** - * Compare two traditional HBase cells. - * - * Note: This comparator is not valid for -ROOT- and .META. tables. - */ -@InterfaceAudience.Private -@InterfaceStability.Evolving -public class CellComparator implements Comparator, Serializable{ - private static final long serialVersionUID = -8760041766259623329L; - - @Override - public int compare(Cell a, Cell b) { - return compareStatic(a, b); - } - - - public static int compareStatic(Cell a, Cell b) { - //row - int c = Bytes.compareTo( - a.getRowArray(), a.getRowOffset(), a.getRowLength(), - b.getRowArray(), b.getRowOffset(), b.getRowLength()); - if (c != 0) return c; - - //family - c = Bytes.compareTo( - a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(), - b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength()); - if (c != 0) return c; - - //qualifier - c = Bytes.compareTo( - a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(), - b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength()); - if (c != 0) return c; - - //timestamp: later sorts first - c = -Longs.compare(a.getTimestamp(), b.getTimestamp()); - if (c != 0) return c; - - //type - c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte()); - if (c != 0) return c; - - //mvccVersion: later sorts first - return -Longs.compare(a.getMvccVersion(), b.getMvccVersion()); - } - - - /**************** equals ****************************/ - - public static boolean equals(Cell a, Cell b){ - if (!areKeyLengthsEqual(a, b)) { - return false; - } - //TODO compare byte[]'s in reverse since later bytes more likely to differ - return 0 == compareStatic(a, b); - } - - public static boolean equalsRow(Cell a, Cell b){ - if(!areRowLengthsEqual(a, b)){ - return false; - } - return 0 == Bytes.compareTo( - a.getRowArray(), a.getRowOffset(), a.getRowLength(), - b.getRowArray(), b.getRowOffset(), b.getRowLength()); - } - - - /********************* hashCode ************************/ - - /** - * Returns a hash code that is always the same for two Cells having a matching equals(..) result. - * Currently does not guard against nulls, but it could if necessary. - */ - public static int hashCode(Cell cell){ - if (cell == null) {// return 0 for empty Cell - return 0; - } - - //pre-calculate the 3 hashes made of byte ranges - int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); - int familyHash = Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); - int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); - - //combine the 6 sub-hashes - int hash = 31 * rowHash + familyHash; - hash = 31 * hash + qualifierHash; - hash = 31 * hash + (int)cell.getTimestamp(); - hash = 31 * hash + cell.getTypeByte(); - hash = 31 * hash + (int)cell.getMvccVersion(); - return hash; - } - - - /******************** lengths *************************/ - - public static boolean areKeyLengthsEqual(Cell a, Cell b) { - return a.getRowLength() == b.getRowLength() - && a.getFamilyLength() == b.getFamilyLength() - && a.getQualifierLength() == b.getQualifierLength(); - } - - public static boolean areRowLengthsEqual(Cell a, Cell b) { - return a.getRowLength() == b.getRowLength(); - } - - - /***************** special cases ****************************/ - - /** - * special case for KeyValue.equals - */ - private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) { - //row - int c = Bytes.compareTo( - a.getRowArray(), a.getRowOffset(), a.getRowLength(), - b.getRowArray(), b.getRowOffset(), b.getRowLength()); - if (c != 0) return c; - - //family - c = Bytes.compareTo( - a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(), - b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength()); - if (c != 0) return c; - - //qualifier - c = Bytes.compareTo( - a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(), - b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength()); - if (c != 0) return c; - - //timestamp: later sorts first - c = -Longs.compare(a.getTimestamp(), b.getTimestamp()); - if (c != 0) return c; - - //type - c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte()); - return c; - } - - /** - * special case for KeyValue.equals - */ - public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){ - return 0 == compareStaticIgnoreMvccVersion(a, b); - } - -} diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java deleted file mode 100644 index fcaf27e..0000000 --- a/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.cell; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hbase.Cell; - -/** - * Accepts a stream of Cells and adds them to its internal data structure. This can be used to build - * a block of cells during compactions and flushes, or to build a byte[] to send to the client. This - * could be backed by a List, but more efficient implementations will append results to a - * byte[] to eliminate overhead, and possibly encode the cells further. - */ -@InterfaceAudience.Private -@InterfaceStability.Evolving -public interface CellOutputStream { - - /** - * Implementation must copy the entire state of the Cell. If the appended Cell is modified - * immediately after the append method returns, the modifications must have absolutely no effect - * on the copy of the Cell that was added to the appender. For example, calling someList.add(cell) - * is not correct. - */ - void write(Cell cell); - - /** - * Let the implementation decide what to do. Usually means writing accumulated data into a byte[] - * that can then be read from the implementation to be sent to disk, put in the block cache, or - * sent over the network. - */ - void flush(); - -} diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellScannerPosition.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellScannerPosition.java deleted file mode 100644 index eeadf5f..0000000 --- a/hbase-common/src/main/java/org/apache/hbase/cell/CellScannerPosition.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.cell; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; - -/** - * An indicator of the state of the scanner after an operation such as nextCell() or positionAt(..). - * For example: - *
    - *
  • In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that - * it should load the next block.
  • - *
  • In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted.
  • - *
  • In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the - * next region.
  • - *
- */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public enum CellScannerPosition { - - /** - * getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first - * cell. - */ - BEFORE_FIRST, - - /** - * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), - * rather it is the nearest cell before the requested cell. - */ - BEFORE, - - /** - * getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by - * positionAt(..). - */ - AT, - - /** - * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), - * rather it is the nearest cell after the requested cell. - */ - AFTER, - - /** - * getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect. - */ - AFTER_LAST - -} diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellTool.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellTool.java deleted file mode 100644 index 229ca36..0000000 --- a/hbase-common/src/main/java/org/apache/hbase/cell/CellTool.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.cell; - -import java.nio.ByteBuffer; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hbase.Cell; - -@InterfaceAudience.Private -@InterfaceStability.Evolving -public final class CellTool { - - /******************* ByteRange *******************************/ - - public static ByteRange fillRowRange(Cell cell, ByteRange range) { - return range.set(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); - } - - public static ByteRange fillFamilyRange(Cell cell, ByteRange range) { - return range.set(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); - } - - public static ByteRange fillQualifierRange(Cell cell, ByteRange range) { - return range.set(cell.getQualifierArray(), cell.getQualifierOffset(), - cell.getQualifierLength()); - } - - - /***************** get individual arrays for tests ************/ - - public static byte[] getRowArray(Cell cell){ - byte[] output = new byte[cell.getRowLength()]; - copyRowTo(cell, output, 0); - return output; - } - - public static byte[] getFamilyArray(Cell cell){ - byte[] output = new byte[cell.getFamilyLength()]; - copyFamilyTo(cell, output, 0); - return output; - } - - public static byte[] getQualifierArray(Cell cell){ - byte[] output = new byte[cell.getQualifierLength()]; - copyQualifierTo(cell, output, 0); - return output; - } - - public static byte[] getValueArray(Cell cell){ - byte[] output = new byte[cell.getValueLength()]; - copyValueTo(cell, output, 0); - return output; - } - - - /******************** copyTo **********************************/ - - public static int copyRowTo(Cell cell, byte[] destination, int destinationOffset) { - System.arraycopy(cell.getRowArray(), cell.getRowOffset(), destination, destinationOffset, - cell.getRowLength()); - return destinationOffset + cell.getRowLength(); - } - - public static int copyFamilyTo(Cell cell, byte[] destination, int destinationOffset) { - System.arraycopy(cell.getFamilyArray(), cell.getFamilyOffset(), destination, destinationOffset, - cell.getFamilyLength()); - return destinationOffset + cell.getFamilyLength(); - } - - public static int copyQualifierTo(Cell cell, byte[] destination, int destinationOffset) { - System.arraycopy(cell.getQualifierArray(), cell.getQualifierOffset(), destination, - destinationOffset, cell.getQualifierLength()); - return destinationOffset + cell.getQualifierLength(); - } - - public static int copyValueTo(Cell cell, byte[] destination, int destinationOffset) { - System.arraycopy(cell.getValueArray(), cell.getValueOffset(), destination, destinationOffset, - cell.getValueLength()); - return destinationOffset + cell.getValueLength(); - } - - - /********************* misc *************************************/ - - public static byte getRowByte(Cell cell, int index) { - return cell.getRowArray()[cell.getRowOffset() + index]; - } - - - /********************** KeyValue (move to KeyValueUtils) *********************/ - - public static ByteBuffer getValueBufferShallowCopy(Cell cell) { - ByteBuffer buffer = ByteBuffer.wrap(cell.getValueArray(), cell.getValueOffset(), - cell.getValueLength()); -// buffer.position(buffer.limit());//make it look as if value was appended - return buffer; - } - -} diff --git a/hbase-common/src/main/java/org/apache/hbase/io/CellOutputStream.java b/hbase-common/src/main/java/org/apache/hbase/io/CellOutputStream.java new file mode 100644 index 0000000..661050a --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/io/CellOutputStream.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.io; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hbase.Cell; + +/** + * Accepts a stream of Cells and adds them to its internal data structure. This can be used to build + * a block of cells during compactions and flushes, or to build a byte[] to send to the client. This + * could be backed by a List, but more efficient implementations will append results to a + * byte[] to eliminate overhead, and possibly encode the cells further. + *

To read Cells, use {@link CellScanner} + * @see CellScanner + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface CellOutputStream { + /** + * Implementation must copy the entire state of the Cell. If the written Cell is modified + * immediately after the write method returns, the modifications must have absolutely no effect + * on the copy of the Cell that was added in the write. + * @param cell Cell to write out + * @throws IOException + */ + void write(Cell cell) throws IOException; + + /** + * Let the implementation decide what to do. Usually means writing accumulated data into a byte[] + * that can then be read from the implementation to be sent to disk, put in the block cache, or + * sent over the network. + * @throws IOException + */ + void flush() throws IOException; +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hbase/io/CellScanner.java b/hbase-common/src/main/java/org/apache/hbase/io/CellScanner.java new file mode 100644 index 0000000..4196aac --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/io/CellScanner.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.io; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hbase.Cell; + +/** + * Another name for this class would be CellInputStream. + * + *

An interface for iterating through a sequence of cells. Similar to Java's Iterator, but without + * the hasNext() or remove() methods. The hasNext() method is problematic because it may require + * actually loading the next object, which in turn requires storing the previous object somewhere. + * The core data block decoder should be as fast as possible, so we push the complexity and + * performance expense of concurrently tracking multiple cells to layers above the CellScanner. + *

+ * The get() method will return a reference to a Cell implementation. This reference may + * or may not point to a reusable cell implementation, so users of this class should not, for + * example, accumulate a List of Cells. All of the references may point to the same object, which + * would be the latest state of the underlying Cell. In short, the Cell is mutable. + *

+ * At a minimum, an implementation will need to be able to advance from one cell to the next in a + * LinkedList fashion. The nextQualifier(), nextFamily(), and nextRow() methods can all be + * implemented by calling next(), however, if the DataBlockEncoding supports random access into + * the block then it may provide smarter versions of these methods. + *

+ * Typical usage: + * + *

+ * while (scanner.next()) {
+ *   Cell cell = scanner.getCurrent();
+ *   // do something
+ * }
+ * 
+ * @see CellOutputStream + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface CellScanner { + /** + * @return the current Cell which may be mutable. Will be null before the first read + * has happened. + */ + Cell getCurrent(); + + /** + * Advance the scanner 1 cell. + * @return true if the next cell is found and getCurrentCell() will return a valid Cell + * @throws IOException + */ + boolean read() throws IOException; +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hbase/io/CellScannerPosition.java b/hbase-common/src/main/java/org/apache/hbase/io/CellScannerPosition.java new file mode 100644 index 0000000..e10c4a9 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/io/CellScannerPosition.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.io; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * An indicator of the state of the scanner after an operation such as nextCell() or positionAt(..). + * For example: + *
    + *
  • In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that + * it should load the next block.
  • + *
  • In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted.
  • + *
  • In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the + * next region.
  • + *
+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum CellScannerPosition { + + /** + * getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first + * cell. + */ + BEFORE_FIRST, + + /** + * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), + * rather it is the nearest cell before the requested cell. + */ + BEFORE, + + /** + * getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by + * positionAt(..). + */ + AT, + + /** + * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), + * rather it is the nearest cell after the requested cell. + */ + AFTER, + + /** + * getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect. + */ + AFTER_LAST + +} diff --git a/hbase-common/src/main/java/org/apache/hbase/io/CellSearcher.java b/hbase-common/src/main/java/org/apache/hbase/io/CellSearcher.java new file mode 100644 index 0000000..a0174a5 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/io/CellSearcher.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.io; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hbase.Cell; + +/** + * Search or seek Cells. + * Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that + * the implementation is able to navigate between cells without iterating forward through every + * cell. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface CellSearcher extends ReversibleCellScanner { + /** + * Do everything within this scanner's power to find the key. Look forward and backwards. + *

+ * Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state. + *

+ * @param key position the CellScanner exactly on this key + * @return true if the cell existed and getCurrentCell() holds a valid cell + */ + boolean positionAt(Cell key); + + /** + * Same as positionAt(..), but go to the extra effort of finding the previous key if there's no + * exact match. + *

+ * @param key position the CellScanner on this key or the closest cell before + * @return AT if exact match
+ * BEFORE if on last cell before key
+ * BEFORE_FIRST if key was before the first cell in this scanner's scope + */ + CellScannerPosition positionAtOrBefore(Cell key); + + /** + * Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact + * match. + *

+ * @param key position the CellScanner on this key or the closest cell after + * @return AT if exact match
+ * AFTER if on first cell after key
+ * AFTER_LAST if key was after the last cell in this scanner's scope + */ + CellScannerPosition positionAtOrAfter(Cell key); + + /** + * Note: Added for backwards compatibility with {@link #KeyValueScanner.reseek()} + *

+ * Look for the key, but only look after the current position. Probably not needed for an + * efficient tree implementation, but is important for implementations without random access such + * as unencoded KeyValue blocks. + *

+ * @param key position the CellScanner exactly on this key + * @return true if getCurrent() holds a valid cell + */ + boolean seekForwardTo(Cell key); + + /** + * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no + * exact match. + *

+ * @param key + * @return AT if exact match
+ * AFTER if on first cell after key
+ * AFTER_LAST if key was after the last cell in this scanner's scope + */ + CellScannerPosition seekForwardToOrBefore(Cell key); + + /** + * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no + * exact match. + *

+ * @param key + * @return AT if exact match
+ * AFTER if on first cell after key
+ * AFTER_LAST if key was after the last cell in this scanner's scope + */ + CellScannerPosition seekForwardToOrAfter(Cell key); + + /** + * Note: This may not be appropriate to have in the interface. Need to investigate. + *

+ * Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST. + * This is used by tests and for handling certain edge cases. + */ + void positionAfterLastCell(); +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hbase/io/ReversibleCellScanner.java b/hbase-common/src/main/java/org/apache/hbase/io/ReversibleCellScanner.java new file mode 100644 index 0000000..c0ba46b --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/io/ReversibleCellScanner.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.io; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * An extension of {@link CellScanner} indicating the scanner supports iterating backwards through cells. + *

+ * Note: This was not added to suggest that HBase should support client facing reverse Scanners, but + * because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing up + * if the positionAt(..) method goes past the requested cell. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface ReversibleCellScanner extends CellScanner { + /** + * Try to position the scanner one Cell before the current position. + * @return true if the operation was successful, meaning getCurrentCell() will return a valid + * Cell.
+ * false if there were no previous cells, meaning getCurrentCell() will return null. + * Scanner position will be {@link CellScannerPosition.BEFORE_FIRST} + */ + boolean previous(); + + /** + * Try to position the scanner in the row before the current row. + * @param endOfRow true for the last cell in the previous row; false for the first cell + * @return true if the operation was successful, meaning getCurrentCell() will return a valid + * Cell.
+ * false if there were no previous cells, meaning getCurrentCell() will return null. + * Scanner position will be {@link CellScannerPosition.BEFORE_FIRST} + */ + boolean previousRow(boolean endOfRow); +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hbase/io/codec/Decoder.java b/hbase-common/src/main/java/org/apache/hbase/io/codec/Decoder.java new file mode 100644 index 0000000..d9cf79b --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/io/codec/Decoder.java @@ -0,0 +1,15 @@ +package org.apache.hbase.io.codec; + +import org.apache.hbase.Cell; + +/** + * Cell decoder + */ +public interface Decoder { + /** + * Decode next cell. + * @return Next decoded cell or null if finished decoding + * @throws CodecException + */ + Cell decode() throws CodecException; +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hbase/io/codec/Encoder.java b/hbase-common/src/main/java/org/apache/hbase/io/codec/Encoder.java new file mode 100644 index 0000000..e9bef80 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/io/codec/Encoder.java @@ -0,0 +1,32 @@ +package org.apache.hbase.io.codec; + +import org.apache.hbase.Cell; + +/** + * Cell encoder. + */ +public interface Encoder { + // TODO: org.apache.avro.io.Encoder is like this only does java primitive types, not Cells, + // but it does do 'fixed' bytes.... could serialize Cell and pass these... ugh.. + // wouldn't buy us much. + // TODO: Do a context for encoder and then another for decoder because they + // will have different stuff? Or is that something not in this Interface (I had + // it in here and then removed it all) + /** + * Implementation must copy the entire state of the Cell. If the passed Cell is modified + * immediately after the encode method returns, the modifications must have absolutely no effect + * on the copy of the Cell that was added to the encoder. + * @param cell Cell to encode. + * @return The passed cell + * @throws CodecException + */ + Cell encode(Cell cell) throws CodecException; + + /** + * Finish up the encoding. Add END-OF-ENCODING markers or flush the + * stream, etc. + * You cannot call {@link #encode(Cell)} after invoking this method. + * @throws CodecException + */ + void finish() throws CodecException; +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hbase/io/codec/OldSchoolKeyValueDecoder.java b/hbase-common/src/main/java/org/apache/hbase/io/codec/OldSchoolKeyValueDecoder.java new file mode 100644 index 0000000..2eb72dc --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/io/codec/OldSchoolKeyValueDecoder.java @@ -0,0 +1,43 @@ +package org.apache.hbase.io.codec; + +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hbase.Cell; + +/** + * @see OldSchoolKeyValueEncoder + */ +public class OldSchoolKeyValueDecoder implements Decoder { + private final DataInputStream in; + // If true, this decoder is finished decoding. + private boolean finished = false; + + public OldSchoolKeyValueDecoder(final DataInputStream in) { + this.in = in; + } + + @Override + public Cell decode() throws CodecException { + Cell cell = null; + if (!this.finished) { + int length; + try { + length = this.in.readInt(); + } catch (IOException e) { + throw new CodecException(e); + } + if (length == OldSchoolKeyValueEncoder.END_OF_KEYVALUES) { + this.finished = true; + } else { + try { + cell = KeyValue.create(length, this.in); + } catch (IOException e) { + throw new CodecException(e); + } + } + } + return cell; + } +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hbase/io/codec/OldSchoolKeyValueEncoder.java b/hbase-common/src/main/java/org/apache/hbase/io/codec/OldSchoolKeyValueEncoder.java new file mode 100644 index 0000000..dffac45 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/io/codec/OldSchoolKeyValueEncoder.java @@ -0,0 +1,68 @@ +package org.apache.hbase.io.codec; + +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hbase.Cell; + +/** + * Encodes by casting Cell to KeyValue and writing out the backing array with a length prefix. + * This is how KVs were serialized in Puts, Deletes and Results pre-0.96. Its what would + * happen if you called the Writable#write KeyValue implementation. This encoder will fail + * if the passed Cell is not an old school pre-0.96 KeyValue. Does not copy bytes writing. + * It just writes them direct to the passed stream. When {@link #finish} is called, + * we write out an End-Of-KeyValues marker to the stream. If you wrote two KeyValues to + * this encoder, it would look like this in the stream: + *

+ * length-of-KeyValue1 // A java int with the length of KeyValue1 backing array
+ * KeyValue1 backing array
+ * length-of-KeyValue2
+ * KeyValue2 backing array
+ * length-of-zero // A java int whose value is 0; marks end of the encoded section
+ * 
+ * @see OldSchoolKeyValueDecoder + */ +public class OldSchoolKeyValueEncoder implements Encoder { + // We write out an '0' int as marker that there are no more kvs when you call flush. + static final int END_OF_KEYVALUES = 0; + // Need to be able to write java types such as int and long so need DataOutput. + // Want to stream too so DataOutputStream. + private final DataOutputStream out; + // This encoder is 'done' once flush has been called because on flush we + // write out the END_OF_KEYVALUES marker. + private boolean finish = false; + + public OldSchoolKeyValueEncoder(final DataOutputStream out) { + this.out = out; + } + + @Override + public Cell encode(Cell cell) throws CodecException { + if (this.finish) throw new CodecException("Finished"); + // This is crass and will not work when KV changes + try { + KeyValue.write((KeyValue)cell, this.out); + } catch (IOException e) { + throw new CodecException(e); + } + return cell; + } + + /** + * Calling flush 'finishes' this encoder. Subsequent calls + * to {@link #write(Cell)} will throw exception. + */ + @Override + public void finish() throws CodecException { + if (this.finish) return; + this.finish = true; + // Write out an int whose value is zero as end of stream. + try { + this.out.writeInt(END_OF_KEYVALUES); + this.out.flush(); + } catch (IOException e) { + throw new CodecException(e); + } + } +} \ No newline at end of file