diff --git hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java index a6ddc14..d5e84e2 100644 --- hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java +++ hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java @@ -801,6 +801,30 @@ public abstract class Mutation extends OperationWithAttributes implements Row, C return this; } + // Add Cell with input family. It avoids all cloneFamily() and getFamilyArray() + // so no new byte buffer allocaiton/copy happens inside this method. + Mutation add(final byte[] family, Cell cell) throws IOException { + //Checking that the row of the kv is the same as the mutation + // TODO: It is fraught with risk if user pass the wrong row. + // Throwing the IllegalArgumentException is more suitable I'd say. + if (!CellUtil.matchingRows(cell, this.row)) { + throw new WrongRowIOException("The row in " + cell.toString() + + " doesn't match the original one " + Bytes.toStringBinary(this.row)); + } + + if ((family == null) || (family.length == 0)) { + throw new IllegalArgumentException("Family cannot be null"); + } + + if (cell instanceof ExtendedCell) { + getCellList(family).add(cell); + } else { + getCellList(family).add(new CellWrapper(cell)); + } + return this; + } + + private static final class CellWrapper implements ExtendedCell { private static final long FIXED_OVERHEAD = ClassSize.align( ClassSize.OBJECT // object header diff --git hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java index db8eec5..d7c7cff 100644 --- hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java +++ hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java @@ -283,6 +283,12 @@ public class Put extends Mutation implements HeapSize { return this; } + public Put add(final byte[] family, Cell cell) throws IOException { + super.add(family, cell); + return this; + } + + @Override public Put setTimestamp(long timestamp) { super.setTimestamp(timestamp); diff --git hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java index d8d46b6..8ad56c4 100644 --- hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java +++ hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java @@ -26,6 +26,7 @@ import java.nio.ByteBuffer; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Locale; @@ -88,6 +89,7 @@ import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.io.TimeRange; +import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.protobuf.ProtobufMagic; import org.apache.hadoop.hbase.protobuf.ProtobufMessageConverter; import org.apache.hadoop.hbase.quotas.QuotaScope; @@ -406,8 +408,7 @@ public final class ProtobufUtil { * @param proto protocol buffer ServerNameList * @return a list of ServerName */ - public static List toServerNameList( - List proto) { + public static List toServerNameList(List proto) { return proto.stream().map(ProtobufUtil::toServerName) .collect(Collectors.toList()); } @@ -619,6 +620,8 @@ public final class ProtobufUtil { throw new DoNotRetryIOException("Cell count of " + cellCount + " but no cellScanner: " + toShortString(proto)); } + byte[] familyAdded = null, familyFromCell = null; + byte familyLen = 0; for (int i = 0; i < cellCount; i++) { if (!cellScanner.advance()) { throw new DoNotRetryIOException("Cell count of " + cellCount + " but at index " + i + @@ -628,12 +631,37 @@ public final class ProtobufUtil { if (put == null) { put = new Put(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), timestamp); } - put.add(cell); + + // The idea here is to avoid allocating family byte array each time when reading a cell. + // For cells under one family, there are at most two family byte arrays allocated even if + // there are more than 2 qualifiers. + // Each time, it is going to compare the family byte arrays (familyAdded and familyFromCell) + // , it seems that this is extra overhead. However, this overhead is only for different + // families in the put. For cells under one family, it will save the TreeMap lookup + // since it tries to look up the same byte buffer in the TreeMap, which is a shortcircuit. + familyLen = cell.getFamilyLength(); + if (familyFromCell == null) { + familyFromCell = new byte[familyLen]; + } else if (familyFromCell.length != familyLen) { + familyAdded = null; + familyFromCell = new byte[familyLen]; + } + + CellUtil.copyFamilyTo(cell, familyLen, familyFromCell, 0); + + if ((familyAdded == null) || !Arrays.equals(familyAdded, familyFromCell)) { + put.add(familyFromCell, cell); + familyAdded = familyFromCell; + familyFromCell = null; + } else { + put.add(familyAdded, cell); + } } } else { if (put == null) { throw new IllegalArgumentException("row cannot be null"); } + // The proto has the metadata and the data itself ExtendedCellBuilder cellBuilder = ExtendedCellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); for (ColumnValue column: proto.getColumnValueList()) { @@ -651,7 +679,7 @@ public final class ProtobufUtil { if (qv.hasTags()) { allTagsBytes = qv.getTags().toByteArray(); if(qv.hasDeleteType()) { - put.add(cellBuilder.clear() + put.add(family, cellBuilder.clear() .setRow(proto.getRow().toByteArray()) .setFamily(family) .setQualifier(qv.hasQualifier() ? qv.getQualifier().toByteArray() : null) @@ -660,7 +688,7 @@ public final class ProtobufUtil { .setTags(allTagsBytes) .build()); } else { - put.add(cellBuilder.clear() + put.add(family, cellBuilder.clear() .setRow(put.getRow()) .setFamily(family) .setQualifier(qv.hasQualifier() ? qv.getQualifier().toByteArray() : null) @@ -672,7 +700,7 @@ public final class ProtobufUtil { } } else { if(qv.hasDeleteType()) { - put.add(cellBuilder.clear() + put.add(family, cellBuilder.clear() .setRow(put.getRow()) .setFamily(family) .setQualifier(qv.hasQualifier() ? qv.getQualifier().toByteArray() : null) @@ -680,7 +708,7 @@ public final class ProtobufUtil { .setType(fromDeleteType(qv.getDeleteType()).getCode()) .build()); } else{ - put.add(cellBuilder.clear() + put.add(family, cellBuilder.clear() .setRow(put.getRow()) .setFamily(family) .setQualifier(qv.hasQualifier() ? qv.getQualifier().toByteArray() : null) diff --git hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java index c33517b..bb1a4fa 100644 --- hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java +++ hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java @@ -233,10 +233,10 @@ public final class CellUtil { short rowLen = cell.getRowLength(); if (cell instanceof ByteBufferExtendedCell) { ByteBufferUtils.copyFromBufferToBuffer(((ByteBufferExtendedCell) cell).getRowByteBuffer(), - destination, ((ByteBufferExtendedCell) cell).getRowPosition(), destinationOffset, rowLen); + destination, ((ByteBufferExtendedCell) cell).getRowPosition(), destinationOffset, rowLen); } else { ByteBufferUtils.copyFromArrayToBuffer(destination, destinationOffset, cell.getRowArray(), - cell.getRowOffset(), rowLen); + cell.getRowOffset(), rowLen); } return destinationOffset + rowLen; } @@ -277,6 +277,19 @@ public final class CellUtil { return destinationOffset + fLen; } + public static int copyFamilyTo(Cell cell, final byte familyLen, byte[] destination, + int destinationOffset) { + if (cell instanceof ByteBufferExtendedCell) { + ByteBufferUtils.copyFromBufferToArray(destination, + ((ByteBufferExtendedCell) cell).getFamilyByteBuffer(), + ((ByteBufferExtendedCell) cell).getFamilyPosition(), destinationOffset, familyLen); + } else { + System.arraycopy(cell.getFamilyArray(), cell.getFamilyOffset(), destination, + destinationOffset, familyLen); + } + return destinationOffset + familyLen; + } + /** * Copies the family to the given bytebuffer * @param cell the cell whose family has to be copied