From bcda9bebc0692ab47716f1df5837a651f603e937 Mon Sep 17 00:00:00 2001 From: mbautin Date: Fri, 13 Apr 2012 19:38:57 -0700 Subject: [PATCH] [jira] [HBASE-5104] Provide a reliable intra-row pagination mechanism Summary: Porting Madhu's patch for intra-row pagination (rHBASEEIGHTNINEFBBRANCH1326043) to trunk. This is what we have in 89-fb just as a starting point (currently there are test failures). Test Plan: Run unit tests Reviewers: madhuvaidya, lhofhansl, Kannan, tedyu, stack, todd, JIRA Differential Revision: https://reviews.facebook.net/D2799 --- .../java/org/apache/hadoop/hbase/client/Get.java | 58 +++- .../org/apache/hadoop/hbase/client/Result.java | 15 + .../java/org/apache/hadoop/hbase/client/Scan.java | 58 +++- .../apache/hadoop/hbase/protobuf/ProtobufUtil.java | 12 + .../hadoop/hbase/protobuf/RequestConverter.java | 5 + .../hbase/protobuf/generated/ClientProtos.java | 426 +++++++++++++++---- .../hadoop/hbase/regionserver/StoreScanner.java | 33 ++- src/main/protobuf/Client.proto | 4 + .../java/org/apache/hadoop/hbase/HTestConst.java | 27 ++- .../hbase/client/TestIntraRowPagination.java | 102 +++++ .../hbase/client/TestScannersFromClientSide.java | 453 ++++++++++++++++++++ 11 files changed, 1105 insertions(+), 88 deletions(-) create mode 100644 src/test/java/org/apache/hadoop/hbase/client/TestIntraRowPagination.java create mode 100644 src/test/java/org/apache/hadoop/hbase/client/TestScannersFromClientSide.java diff --git src/main/java/org/apache/hadoop/hbase/client/Get.java src/main/java/org/apache/hadoop/hbase/client/Get.java index 2e8b795..6b22f79 100644 --- src/main/java/org/apache/hadoop/hbase/client/Get.java +++ src/main/java/org/apache/hadoop/hbase/client/Get.java @@ -69,12 +69,14 @@ import java.util.TreeSet; @InterfaceStability.Stable public class Get extends OperationWithAttributes implements Writable, Row, Comparable { - private static final byte GET_VERSION = (byte)2; + private static final byte GET_VERSION = (byte)3; private byte [] row = null; private long lockId = -1L; private int maxVersions = 1; private boolean cacheBlocks = true; + private int storeLimit = -1; + private int storeOffset = 0; private Filter filter = null; private TimeRange tr = new TimeRange(); private Map> familyMap = @@ -192,6 +194,26 @@ public class Get extends OperationWithAttributes } /** + * Set the maximum number of values to return per row per Column Family + * @param limit the maximum number of values returned / row / CF + * @return this for invocation chaining + */ + public Get setMaxResultsPerColumnFamily(int limit) { + this.storeLimit = limit; + return this; + } + + /** + * Set offset for the row per Column Family. + * @param offset is the number of kvs that will be skipped. + * @return this for invocation chaining + */ + public Get setRowOffsetPerColumnFamily(int offset) { + this.storeOffset = offset; + return this; + } + + /** * Apply the specified server-side filter when performing the Get. * Only {@link Filter#filterKeyValue(KeyValue)} is called AFTER all tests * for ttl, column match, deletes and max versions have been run. @@ -268,6 +290,24 @@ public class Get extends OperationWithAttributes } /** + * Method for retrieving the get's maximum number of values + * to return per Column Family + * @return the maximum number of values to fetch per CF + */ + public int getMaxResultsPerColumnFamily() { + return this.storeLimit; + } + + /** + * Method for retrieving the get's offset per row per column + * family (#kvs to be skipped) + * @return the row offset + */ + public int getRowOffsetPerColumnFamily() { + return this.storeOffset; + } + + /** * Method for retrieving the get's TimeRange * @return timeRange */ @@ -393,6 +433,10 @@ public class Get extends OperationWithAttributes this.row = Bytes.readByteArray(in); this.lockId = in.readLong(); this.maxVersions = in.readInt(); + if (version > 2) { + this.storeLimit = in.readInt(); + this.storeOffset = in.readInt(); + } boolean hasFilter = in.readBoolean(); if (hasFilter) { this.filter = (Filter)createForName(Bytes.toString(Bytes.readByteArray(in))); @@ -423,10 +467,20 @@ public class Get extends OperationWithAttributes public void write(final DataOutput out) throws IOException { - out.writeByte(GET_VERSION); + byte version; + if (this.storeOffset != 0 || this.storeLimit != -1) { + version = GET_VERSION; + } else { + version = (byte)2; + } + out.writeByte(version); Bytes.writeByteArray(out, this.row); out.writeLong(this.lockId); out.writeInt(this.maxVersions); + if (version > 2) { + out.writeInt(this.storeLimit); + out.writeInt(this.storeOffset); + } if(this.filter == null) { out.writeBoolean(false); } else { diff --git src/main/java/org/apache/hadoop/hbase/client/Result.java src/main/java/org/apache/hadoop/hbase/client/Result.java index df0b3ef..7007e57 100644 --- src/main/java/org/apache/hadoop/hbase/client/Result.java +++ src/main/java/org/apache/hadoop/hbase/client/Result.java @@ -168,6 +168,21 @@ public class Result implements Writable, WritableWithSize { } /** + * Returns a sorted array of KeyValues in this Result. + *

+ * Note: Sorting is done in place, so the backing array will be sorted + * after calling this method. + * @return sorted array of KeyValues + */ + public KeyValue[] sorted() { + if (isEmpty()) { + return null; + } + Arrays.sort(kvs, KeyValue.COMPARATOR); + return kvs; + } + + /** * Return the KeyValues for the specific column. The KeyValues are sorted in * the {@link KeyValue#COMPARATOR} order. That implies the first entry in * the list is the most recent column. If the query (Scan or Get) only diff --git src/main/java/org/apache/hadoop/hbase/client/Scan.java src/main/java/org/apache/hadoop/hbase/client/Scan.java index c4004de..c6ced38 100644 --- src/main/java/org/apache/hadoop/hbase/client/Scan.java +++ src/main/java/org/apache/hadoop/hbase/client/Scan.java @@ -88,11 +88,15 @@ public class Scan extends OperationWithAttributes implements Writable { private static final String RAW_ATTR = "_raw_"; private static final String ISOLATION_LEVEL = "_isolationlevel_"; - private static final byte SCAN_VERSION = (byte)2; + private static final byte SCAN_VERSION = (byte)3; private byte [] startRow = HConstants.EMPTY_START_ROW; private byte [] stopRow = HConstants.EMPTY_END_ROW; private int maxVersions = 1; private int batch = -1; + + private int storeLimit = -1; + private int storeOffset = 0; + // If application wants to collect scan metrics, it needs to // call scan.setAttribute(SCAN_ATTRIBUTES_ENABLE, Bytes.toBytes(Boolean.TRUE)) static public String SCAN_ATTRIBUTES_METRICS_ENABLE = @@ -152,6 +156,8 @@ public class Scan extends OperationWithAttributes implements Writable { stopRow = scan.getStopRow(); maxVersions = scan.getMaxVersions(); batch = scan.getBatch(); + storeLimit = scan.getMaxResultsPerColumnFamily(); + storeOffset = scan.getRowOffsetPerColumnFamily(); caching = scan.getCaching(); cacheBlocks = scan.getCacheBlocks(); filter = scan.getFilter(); // clone? @@ -184,6 +190,8 @@ public class Scan extends OperationWithAttributes implements Writable { this.filter = get.getFilter(); this.cacheBlocks = get.getCacheBlocks(); this.maxVersions = get.getMaxVersions(); + this.storeLimit = get.getMaxResultsPerColumnFamily(); + this.storeOffset = get.getRowOffsetPerColumnFamily(); this.tr = get.getTimeRange(); this.familyMap = get.getFamilyMap(); } @@ -317,6 +325,22 @@ public class Scan extends OperationWithAttributes implements Writable { } /** + * Set the maximum number of values to return per row per Column Family + * @param limit the maximum number of values returned / row / CF + */ + public void setMaxResultsPerColumnFamily(int limit) { + this.storeLimit = limit; + } + + /** + * Set offset for the row per Column Family. + * @param offset is the number of kvs that will be skipped. + */ + public void setRowOffsetPerColumnFamily(int offset) { + this.storeOffset = offset; + } + + /** * Set the number of rows for caching that will be passed to scanners. * If not set, the default setting from {@link HTable#getScannerCaching()} will apply. * Higher caching values will enable faster scanners but will use more memory. @@ -410,6 +434,22 @@ public class Scan extends OperationWithAttributes implements Writable { } /** + * @return maximum number of values to return per row per CF + */ + public int getMaxResultsPerColumnFamily() { + return this.storeLimit; + } + + /** + * Method for retrieving the scan's offset per row per column + * family (#kvs to be skipped) + * @return row offset + */ + public int getRowOffsetPerColumnFamily() { + return this.storeOffset; + } + + /** * @return caching the number of rows fetched when calling next on a scanner */ public int getCaching() { @@ -561,6 +601,10 @@ public class Scan extends OperationWithAttributes implements Writable { this.stopRow = Bytes.readByteArray(in); this.maxVersions = in.readInt(); this.batch = in.readInt(); + if (version > 2) { + this.storeLimit = in.readInt(); + this.storeOffset = in.readInt(); + } this.caching = in.readInt(); this.cacheBlocks = in.readBoolean(); if(in.readBoolean()) { @@ -590,11 +634,21 @@ public class Scan extends OperationWithAttributes implements Writable { public void write(final DataOutput out) throws IOException { - out.writeByte(SCAN_VERSION); + byte version; + if (this.storeOffset != 0 || this.storeLimit != -1) { + version = SCAN_VERSION; + } else { + version = 2; + } + out.writeByte(version); Bytes.writeByteArray(out, this.startRow); Bytes.writeByteArray(out, this.stopRow); out.writeInt(this.maxVersions); out.writeInt(this.batch); + if (version > 2) { + out.writeInt(this.storeLimit); + out.writeInt(this.storeOffset); + } out.writeInt(this.caching); out.writeBoolean(this.cacheBlocks); if(this.filter == null) { diff --git src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java index b056830..fec6587 100644 --- src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java +++ src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java @@ -284,6 +284,12 @@ public final class ProtobufUtil { if (proto.hasMaxVersions()) { get.setMaxVersions(proto.getMaxVersions()); } + if (proto.hasStoreLimit()) { + get.setMaxResultsPerColumnFamily(proto.getStoreLimit()); + } + if (proto.hasStoreOffset()) { + get.setRowOffsetPerColumnFamily(proto.getStoreOffset()); + } if (proto.hasTimeRange()) { HBaseProtos.TimeRange timeRange = proto.getTimeRange(); long minStamp = 0; @@ -513,6 +519,12 @@ public final class ProtobufUtil { if (proto.hasMaxVersions()) { scan.setMaxVersions(proto.getMaxVersions()); } + if (proto.hasStoreLimit()) { + scan.setMaxResultsPerColumnFamily(proto.getStoreLimit()); + } + if (proto.hasStoreOffset()) { + scan.setRowOffsetPerColumnFamily(proto.getStoreOffset()); + } if (proto.hasTimeRange()) { HBaseProtos.TimeRange timeRange = proto.getTimeRange(); long minStamp = 0; diff --git src/main/java/org/apache/hadoop/hbase/protobuf/RequestConverter.java src/main/java/org/apache/hadoop/hbase/protobuf/RequestConverter.java index a912cc3..327dac0 100644 --- src/main/java/org/apache/hadoop/hbase/protobuf/RequestConverter.java +++ src/main/java/org/apache/hadoop/hbase/protobuf/RequestConverter.java @@ -362,6 +362,8 @@ public final class RequestConverter { scanBuilder.setCacheBlocks(scan.getCacheBlocks()); scanBuilder.setBatchSize(scan.getBatch()); scanBuilder.setMaxVersions(scan.getMaxVersions()); + scanBuilder.setStoreLimit(scan.getMaxResultsPerColumnFamily()); + scanBuilder.setStoreOffset(scan.getRowOffsetPerColumnFamily()); TimeRange timeRange = scan.getTimeRange(); if (!timeRange.isAllTime()) { HBaseProtos.TimeRange.Builder timeRangeBuilder = @@ -617,6 +619,9 @@ public final class RequestConverter { builder.setRow(ByteString.copyFrom(get.getRow())); builder.setCacheBlocks(get.getCacheBlocks()); builder.setMaxVersions(get.getMaxVersions()); + builder.setStoreLimit(get.getMaxResultsPerColumnFamily()); + builder.setStoreOffset(get.getRowOffsetPerColumnFamily()); + if (get.getLockId() >= 0) { builder.setLockId(get.getLockId()); } diff --git src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java index de820e2..199af50 100644 --- src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java +++ src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java @@ -549,6 +549,14 @@ public final class ClientProtos { // optional bool cacheBlocks = 8 [default = true]; boolean hasCacheBlocks(); boolean getCacheBlocks(); + + // optional int32 storeLimit = 9 [default = -1]; + boolean hasStoreLimit(); + int getStoreLimit(); + + // optional int32 storeOffset = 10 [default = 0]; + boolean hasStoreOffset(); + int getStoreOffset(); } public static final class Get extends com.google.protobuf.GeneratedMessage @@ -687,6 +695,26 @@ public final class ClientProtos { return cacheBlocks_; } + // optional int32 storeLimit = 9 [default = -1]; + public static final int STORELIMIT_FIELD_NUMBER = 9; + private int storeLimit_; + public boolean hasStoreLimit() { + return ((bitField0_ & 0x00000040) == 0x00000040); + } + public int getStoreLimit() { + return storeLimit_; + } + + // optional int32 storeOffset = 10 [default = 0]; + public static final int STOREOFFSET_FIELD_NUMBER = 10; + private int storeOffset_; + public boolean hasStoreOffset() { + return ((bitField0_ & 0x00000080) == 0x00000080); + } + public int getStoreOffset() { + return storeOffset_; + } + private void initFields() { row_ = com.google.protobuf.ByteString.EMPTY; column_ = java.util.Collections.emptyList(); @@ -696,6 +724,8 @@ public final class ClientProtos { timeRange_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.TimeRange.getDefaultInstance(); maxVersions_ = 1; cacheBlocks_ = true; + storeLimit_ = -1; + storeOffset_ = 0; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -755,6 +785,12 @@ public final class ClientProtos { if (((bitField0_ & 0x00000020) == 0x00000020)) { output.writeBool(8, cacheBlocks_); } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + output.writeInt32(9, storeLimit_); + } + if (((bitField0_ & 0x00000080) == 0x00000080)) { + output.writeInt32(10, storeOffset_); + } getUnknownFields().writeTo(output); } @@ -796,6 +832,14 @@ public final class ClientProtos { size += com.google.protobuf.CodedOutputStream .computeBoolSize(8, cacheBlocks_); } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(9, storeLimit_); + } + if (((bitField0_ & 0x00000080) == 0x00000080)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(10, storeOffset_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -853,6 +897,16 @@ public final class ClientProtos { result = result && (getCacheBlocks() == other.getCacheBlocks()); } + result = result && (hasStoreLimit() == other.hasStoreLimit()); + if (hasStoreLimit()) { + result = result && (getStoreLimit() + == other.getStoreLimit()); + } + result = result && (hasStoreOffset() == other.hasStoreOffset()); + if (hasStoreOffset()) { + result = result && (getStoreOffset() + == other.getStoreOffset()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -894,6 +948,14 @@ public final class ClientProtos { hash = (37 * hash) + CACHEBLOCKS_FIELD_NUMBER; hash = (53 * hash) + hashBoolean(getCacheBlocks()); } + if (hasStoreLimit()) { + hash = (37 * hash) + STORELIMIT_FIELD_NUMBER; + hash = (53 * hash) + getStoreLimit(); + } + if (hasStoreOffset()) { + hash = (37 * hash) + STOREOFFSET_FIELD_NUMBER; + hash = (53 * hash) + getStoreOffset(); + } hash = (29 * hash) + getUnknownFields().hashCode(); return hash; } @@ -1046,6 +1108,10 @@ public final class ClientProtos { bitField0_ = (bitField0_ & ~0x00000040); cacheBlocks_ = true; bitField0_ = (bitField0_ & ~0x00000080); + storeLimit_ = -1; + bitField0_ = (bitField0_ & ~0x00000100); + storeOffset_ = 0; + bitField0_ = (bitField0_ & ~0x00000200); return this; } @@ -1134,6 +1200,14 @@ public final class ClientProtos { to_bitField0_ |= 0x00000020; } result.cacheBlocks_ = cacheBlocks_; + if (((from_bitField0_ & 0x00000100) == 0x00000100)) { + to_bitField0_ |= 0x00000040; + } + result.storeLimit_ = storeLimit_; + if (((from_bitField0_ & 0x00000200) == 0x00000200)) { + to_bitField0_ |= 0x00000080; + } + result.storeOffset_ = storeOffset_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -1220,6 +1294,12 @@ public final class ClientProtos { if (other.hasCacheBlocks()) { setCacheBlocks(other.getCacheBlocks()); } + if (other.hasStoreLimit()) { + setStoreLimit(other.getStoreLimit()); + } + if (other.hasStoreOffset()) { + setStoreOffset(other.getStoreOffset()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -1323,6 +1403,16 @@ public final class ClientProtos { cacheBlocks_ = input.readBool(); break; } + case 72: { + bitField0_ |= 0x00000100; + storeLimit_ = input.readInt32(); + break; + } + case 80: { + bitField0_ |= 0x00000200; + storeOffset_ = input.readInt32(); + break; + } } } } @@ -1968,6 +2058,48 @@ public final class ClientProtos { return this; } + // optional int32 storeLimit = 9 [default = -1]; + private int storeLimit_ = -1; + public boolean hasStoreLimit() { + return ((bitField0_ & 0x00000100) == 0x00000100); + } + public int getStoreLimit() { + return storeLimit_; + } + public Builder setStoreLimit(int value) { + bitField0_ |= 0x00000100; + storeLimit_ = value; + onChanged(); + return this; + } + public Builder clearStoreLimit() { + bitField0_ = (bitField0_ & ~0x00000100); + storeLimit_ = -1; + onChanged(); + return this; + } + + // optional int32 storeOffset = 10 [default = 0]; + private int storeOffset_ ; + public boolean hasStoreOffset() { + return ((bitField0_ & 0x00000200) == 0x00000200); + } + public int getStoreOffset() { + return storeOffset_; + } + public Builder setStoreOffset(int value) { + bitField0_ |= 0x00000200; + storeOffset_ = value; + onChanged(); + return this; + } + public Builder clearStoreOffset() { + bitField0_ = (bitField0_ & ~0x00000200); + storeOffset_ = 0; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:Get) } @@ -8728,6 +8860,14 @@ public final class ClientProtos { // optional uint32 batchSize = 9; boolean hasBatchSize(); int getBatchSize(); + + // optional int32 storeLimit = 10 [default = -1]; + boolean hasStoreLimit(); + int getStoreLimit(); + + // optional int32 storeOffset = 11 [default = 0]; + boolean hasStoreOffset(); + int getStoreOffset(); } public static final class Scan extends com.google.protobuf.GeneratedMessage @@ -8876,6 +9016,26 @@ public final class ClientProtos { return batchSize_; } + // optional int32 storeLimit = 10 [default = -1]; + public static final int STORELIMIT_FIELD_NUMBER = 10; + private int storeLimit_; + public boolean hasStoreLimit() { + return ((bitField0_ & 0x00000080) == 0x00000080); + } + public int getStoreLimit() { + return storeLimit_; + } + + // optional int32 storeOffset = 11 [default = 0]; + public static final int STOREOFFSET_FIELD_NUMBER = 11; + private int storeOffset_; + public boolean hasStoreOffset() { + return ((bitField0_ & 0x00000100) == 0x00000100); + } + public int getStoreOffset() { + return storeOffset_; + } + private void initFields() { column_ = java.util.Collections.emptyList(); attribute_ = java.util.Collections.emptyList(); @@ -8886,6 +9046,8 @@ public final class ClientProtos { maxVersions_ = 1; cacheBlocks_ = true; batchSize_ = 0; + storeLimit_ = -1; + storeOffset_ = 0; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -8944,6 +9106,12 @@ public final class ClientProtos { if (((bitField0_ & 0x00000040) == 0x00000040)) { output.writeUInt32(9, batchSize_); } + if (((bitField0_ & 0x00000080) == 0x00000080)) { + output.writeInt32(10, storeLimit_); + } + if (((bitField0_ & 0x00000100) == 0x00000100)) { + output.writeInt32(11, storeOffset_); + } getUnknownFields().writeTo(output); } @@ -8989,6 +9157,14 @@ public final class ClientProtos { size += com.google.protobuf.CodedOutputStream .computeUInt32Size(9, batchSize_); } + if (((bitField0_ & 0x00000080) == 0x00000080)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(10, storeLimit_); + } + if (((bitField0_ & 0x00000100) == 0x00000100)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(11, storeOffset_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -9051,6 +9227,16 @@ public final class ClientProtos { result = result && (getBatchSize() == other.getBatchSize()); } + result = result && (hasStoreLimit() == other.hasStoreLimit()); + if (hasStoreLimit()) { + result = result && (getStoreLimit() + == other.getStoreLimit()); + } + result = result && (hasStoreOffset() == other.hasStoreOffset()); + if (hasStoreOffset()) { + result = result && (getStoreOffset() + == other.getStoreOffset()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -9096,6 +9282,14 @@ public final class ClientProtos { hash = (37 * hash) + BATCHSIZE_FIELD_NUMBER; hash = (53 * hash) + getBatchSize(); } + if (hasStoreLimit()) { + hash = (37 * hash) + STORELIMIT_FIELD_NUMBER; + hash = (53 * hash) + getStoreLimit(); + } + if (hasStoreOffset()) { + hash = (37 * hash) + STOREOFFSET_FIELD_NUMBER; + hash = (53 * hash) + getStoreOffset(); + } hash = (29 * hash) + getUnknownFields().hashCode(); return hash; } @@ -9250,6 +9444,10 @@ public final class ClientProtos { bitField0_ = (bitField0_ & ~0x00000080); batchSize_ = 0; bitField0_ = (bitField0_ & ~0x00000100); + storeLimit_ = -1; + bitField0_ = (bitField0_ & ~0x00000200); + storeOffset_ = 0; + bitField0_ = (bitField0_ & ~0x00000400); return this; } @@ -9342,6 +9540,14 @@ public final class ClientProtos { to_bitField0_ |= 0x00000040; } result.batchSize_ = batchSize_; + if (((from_bitField0_ & 0x00000200) == 0x00000200)) { + to_bitField0_ |= 0x00000080; + } + result.storeLimit_ = storeLimit_; + if (((from_bitField0_ & 0x00000400) == 0x00000400)) { + to_bitField0_ |= 0x00000100; + } + result.storeOffset_ = storeOffset_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -9431,6 +9637,12 @@ public final class ClientProtos { if (other.hasBatchSize()) { setBatchSize(other.getBatchSize()); } + if (other.hasStoreLimit()) { + setStoreLimit(other.getStoreLimit()); + } + if (other.hasStoreOffset()) { + setStoreOffset(other.getStoreOffset()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -9535,6 +9747,16 @@ public final class ClientProtos { batchSize_ = input.readUInt32(); break; } + case 80: { + bitField0_ |= 0x00000200; + storeLimit_ = input.readInt32(); + break; + } + case 88: { + bitField0_ |= 0x00000400; + storeOffset_ = input.readInt32(); + break; + } } } } @@ -10204,6 +10426,48 @@ public final class ClientProtos { return this; } + // optional int32 storeLimit = 10 [default = -1]; + private int storeLimit_ = -1; + public boolean hasStoreLimit() { + return ((bitField0_ & 0x00000200) == 0x00000200); + } + public int getStoreLimit() { + return storeLimit_; + } + public Builder setStoreLimit(int value) { + bitField0_ |= 0x00000200; + storeLimit_ = value; + onChanged(); + return this; + } + public Builder clearStoreLimit() { + bitField0_ = (bitField0_ & ~0x00000200); + storeLimit_ = -1; + onChanged(); + return this; + } + + // optional int32 storeOffset = 11 [default = 0]; + private int storeOffset_ ; + public boolean hasStoreOffset() { + return ((bitField0_ & 0x00000400) == 0x00000400); + } + public int getStoreOffset() { + return storeOffset_; + } + public Builder setStoreOffset(int value) { + bitField0_ |= 0x00000400; + storeOffset_ = value; + onChanged(); + return this; + } + public Builder clearStoreOffset() { + bitField0_ = (bitField0_ & ~0x00000400); + storeOffset_ = 0; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:Scan) } @@ -20517,89 +20781,91 @@ public final class ClientProtos { static { java.lang.String[] descriptorData = { "\n\014Client.proto\032\013hbase.proto\"+\n\006Column\022\016\n" + - "\006family\030\001 \002(\014\022\021\n\tqualifier\030\002 \003(\014\"\320\001\n\003Get" + + "\006family\030\001 \002(\014\022\021\n\tqualifier\030\002 \003(\014\"\200\002\n\003Get" + "\022\013\n\003row\030\001 \002(\014\022\027\n\006column\030\002 \003(\0132\007.Column\022!" + "\n\tattribute\030\003 \003(\0132\016.NameBytesPair\022\016\n\006loc" + "kId\030\004 \001(\004\022\036\n\006filter\030\005 \001(\0132\016.NameBytesPai" + "r\022\035\n\ttimeRange\030\006 \001(\0132\n.TimeRange\022\026\n\013maxV" + "ersions\030\007 \001(\r:\0011\022\031\n\013cacheBlocks\030\010 \001(\010:\004t" + - "rue\"\037\n\006Result\022\025\n\rkeyValueBytes\030\001 \003(\014\"r\n\n" + - "GetRequest\022 \n\006region\030\001 \002(\0132\020.RegionSpeci" + - "fier\022\021\n\003get\030\002 \002(\0132\004.Get\022\030\n\020closestRowBef", - "ore\030\003 \001(\010\022\025\n\rexistenceOnly\030\004 \001(\010\"6\n\013GetR" + - "esponse\022\027\n\006result\030\001 \001(\0132\007.Result\022\016\n\006exis" + - "ts\030\002 \001(\010\"\200\002\n\tCondition\022\013\n\003row\030\001 \002(\014\022\016\n\006f" + - "amily\030\002 \002(\014\022\021\n\tqualifier\030\003 \002(\014\022+\n\013compar" + - "eType\030\004 \002(\0162\026.Condition.CompareType\022\"\n\nc" + - "omparator\030\005 \002(\0132\016.NameBytesPair\"r\n\013Compa" + - "reType\022\010\n\004LESS\020\000\022\021\n\rLESS_OR_EQUAL\020\001\022\t\n\005E" + - "QUAL\020\002\022\r\n\tNOT_EQUAL\020\003\022\024\n\020GREATER_OR_EQUA" + - "L\020\004\022\013\n\007GREATER\020\005\022\t\n\005NO_OP\020\006\"\306\004\n\006Mutate\022\013" + - "\n\003row\030\001 \002(\014\022&\n\nmutateType\030\002 \002(\0162\022.Mutate", - ".MutateType\022(\n\013columnValue\030\003 \003(\0132\023.Mutat" + - "e.ColumnValue\022!\n\tattribute\030\004 \003(\0132\016.NameB" + - "ytesPair\022\021\n\ttimestamp\030\005 \001(\004\022\016\n\006lockId\030\006 " + - "\001(\004\022\030\n\nwriteToWAL\030\007 \001(\010:\004true\022\035\n\ttimeRan" + - "ge\030\n \001(\0132\n.TimeRange\032\310\001\n\013ColumnValue\022\016\n\006" + - "family\030\001 \002(\014\022:\n\016qualifierValue\030\002 \003(\0132\".M" + - "utate.ColumnValue.QualifierValue\032m\n\016Qual" + - "ifierValue\022\021\n\tqualifier\030\001 \001(\014\022\r\n\005value\030\002" + - " \001(\014\022\021\n\ttimestamp\030\003 \001(\004\022&\n\ndeleteType\030\004 " + - "\001(\0162\022.Mutate.DeleteType\"<\n\nMutateType\022\n\n", - "\006APPEND\020\000\022\r\n\tINCREMENT\020\001\022\007\n\003PUT\020\002\022\n\n\006DEL" + - "ETE\020\003\"U\n\nDeleteType\022\026\n\022DELETE_ONE_VERSIO" + - "N\020\000\022\034\n\030DELETE_MULTIPLE_VERSIONS\020\001\022\021\n\rDEL" + - "ETE_FAMILY\020\002\"i\n\rMutateRequest\022 \n\006region\030" + - "\001 \002(\0132\020.RegionSpecifier\022\027\n\006mutate\030\002 \002(\0132" + - "\007.Mutate\022\035\n\tcondition\030\003 \001(\0132\n.Condition\"" + - "<\n\016MutateResponse\022\027\n\006result\030\001 \001(\0132\007.Resu" + - "lt\022\021\n\tprocessed\030\002 \001(\010\"\352\001\n\004Scan\022\027\n\006column" + - "\030\001 \003(\0132\007.Column\022!\n\tattribute\030\002 \003(\0132\016.Nam" + - "eBytesPair\022\020\n\010startRow\030\003 \001(\014\022\017\n\007stopRow\030", - "\004 \001(\014\022\036\n\006filter\030\005 \001(\0132\016.NameBytesPair\022\035\n" + - "\ttimeRange\030\006 \001(\0132\n.TimeRange\022\026\n\013maxVersi" + - "ons\030\007 \001(\r:\0011\022\031\n\013cacheBlocks\030\010 \001(\010:\004true\022" + - "\021\n\tbatchSize\030\t \001(\r\"\203\001\n\013ScanRequest\022 \n\006re" + - "gion\030\001 \001(\0132\020.RegionSpecifier\022\023\n\004scan\030\002 \001" + - "(\0132\005.Scan\022\021\n\tscannerId\030\003 \001(\004\022\024\n\014numberOf" + - "Rows\030\004 \001(\r\022\024\n\014closeScanner\030\005 \001(\010\"\\\n\014Scan" + - "Response\022\027\n\006result\030\001 \003(\0132\007.Result\022\021\n\tsca" + - "nnerId\030\002 \001(\004\022\023\n\013moreResults\030\003 \001(\010\022\013\n\003ttl" + - "\030\004 \001(\r\"?\n\016LockRowRequest\022 \n\006region\030\001 \002(\013", - "2\020.RegionSpecifier\022\013\n\003row\030\002 \003(\014\".\n\017LockR" + - "owResponse\022\016\n\006lockId\030\001 \002(\004\022\013\n\003ttl\030\002 \001(\r\"" + - "D\n\020UnlockRowRequest\022 \n\006region\030\001 \002(\0132\020.Re" + - "gionSpecifier\022\016\n\006lockId\030\002 \002(\004\"\023\n\021UnlockR" + - "owResponse\"\232\001\n\024BulkLoadHFileRequest\022 \n\006r" + - "egion\030\001 \002(\0132\020.RegionSpecifier\0224\n\nfamilyP" + - "ath\030\002 \003(\0132 .BulkLoadHFileRequest.FamilyP" + - "ath\032*\n\nFamilyPath\022\016\n\006family\030\001 \002(\014\022\014\n\004pat" + - "h\030\002 \002(\t\"\'\n\025BulkLoadHFileResponse\022\016\n\006load" + - "ed\030\001 \002(\010\"\203\001\n\004Exec\022\013\n\003row\030\001 \002(\014\022\024\n\014protoc", - "olName\030\002 \002(\t\022\022\n\nmethodName\030\003 \002(\t\022!\n\010prop" + - "erty\030\004 \003(\0132\017.NameStringPair\022!\n\tparameter" + - "\030\005 \003(\0132\016.NameBytesPair\"O\n\026ExecCoprocesso" + - "rRequest\022 \n\006region\030\001 \002(\0132\020.RegionSpecifi" + - "er\022\023\n\004call\030\002 \002(\0132\005.Exec\"8\n\027ExecCoprocess" + - "orResponse\022\035\n\005value\030\001 \002(\0132\016.NameBytesPai" + - "r\"P\n\014ActionResult\022\035\n\005value\030\001 \001(\0132\016.NameB" + - "ytesPair\022!\n\texception\030\002 \001(\0132\016.NameBytesP" + - "air\"`\n\014MultiRequest\022 \n\006region\030\001 \002(\0132\020.Re" + - "gionSpecifier\022\036\n\006action\030\002 \003(\0132\016.NameByte", - "sPair\022\016\n\006atomic\030\003 \001(\010\".\n\rMultiResponse\022\035" + - "\n\006result\030\001 \003(\0132\r.ActionResult2\221\003\n\rClient" + - "Service\022 \n\003get\022\013.GetRequest\032\014.GetRespons" + - "e\022)\n\006mutate\022\016.MutateRequest\032\017.MutateResp" + - "onse\022#\n\004scan\022\014.ScanRequest\032\r.ScanRespons" + - "e\022,\n\007lockRow\022\017.LockRowRequest\032\020.LockRowR" + - "esponse\0222\n\tunlockRow\022\021.UnlockRowRequest\032" + - "\022.UnlockRowResponse\022>\n\rbulkLoadHFile\022\025.B" + - "ulkLoadHFileRequest\032\026.BulkLoadHFileRespo" + - "nse\022D\n\017execCoprocessor\022\027.ExecCoprocessor", - "Request\032\030.ExecCoprocessorResponse\022&\n\005mul" + - "ti\022\r.MultiRequest\032\016.MultiResponseBB\n*org" + - ".apache.hadoop.hbase.protobuf.generatedB" + - "\014ClientProtosH\001\210\001\001\240\001\001" + "rue\022\026\n\nstoreLimit\030\t \001(\005:\002-1\022\026\n\013storeOffs" + + "et\030\n \001(\005:\0010\"\037\n\006Result\022\025\n\rkeyValueBytes\030\001" + + " \003(\014\"r\n\nGetRequest\022 \n\006region\030\001 \002(\0132\020.Reg", + "ionSpecifier\022\021\n\003get\030\002 \002(\0132\004.Get\022\030\n\020close" + + "stRowBefore\030\003 \001(\010\022\025\n\rexistenceOnly\030\004 \001(\010" + + "\"6\n\013GetResponse\022\027\n\006result\030\001 \001(\0132\007.Result" + + "\022\016\n\006exists\030\002 \001(\010\"\200\002\n\tCondition\022\013\n\003row\030\001 " + + "\002(\014\022\016\n\006family\030\002 \002(\014\022\021\n\tqualifier\030\003 \002(\014\022+" + + "\n\013compareType\030\004 \002(\0162\026.Condition.CompareT" + + "ype\022\"\n\ncomparator\030\005 \002(\0132\016.NameBytesPair\"" + + "r\n\013CompareType\022\010\n\004LESS\020\000\022\021\n\rLESS_OR_EQUA" + + "L\020\001\022\t\n\005EQUAL\020\002\022\r\n\tNOT_EQUAL\020\003\022\024\n\020GREATER" + + "_OR_EQUAL\020\004\022\013\n\007GREATER\020\005\022\t\n\005NO_OP\020\006\"\306\004\n\006", + "Mutate\022\013\n\003row\030\001 \002(\014\022&\n\nmutateType\030\002 \002(\0162" + + "\022.Mutate.MutateType\022(\n\013columnValue\030\003 \003(\013" + + "2\023.Mutate.ColumnValue\022!\n\tattribute\030\004 \003(\013" + + "2\016.NameBytesPair\022\021\n\ttimestamp\030\005 \001(\004\022\016\n\006l" + + "ockId\030\006 \001(\004\022\030\n\nwriteToWAL\030\007 \001(\010:\004true\022\035\n" + + "\ttimeRange\030\n \001(\0132\n.TimeRange\032\310\001\n\013ColumnV" + + "alue\022\016\n\006family\030\001 \002(\014\022:\n\016qualifierValue\030\002" + + " \003(\0132\".Mutate.ColumnValue.QualifierValue" + + "\032m\n\016QualifierValue\022\021\n\tqualifier\030\001 \001(\014\022\r\n" + + "\005value\030\002 \001(\014\022\021\n\ttimestamp\030\003 \001(\004\022&\n\ndelet", + "eType\030\004 \001(\0162\022.Mutate.DeleteType\"<\n\nMutat" + + "eType\022\n\n\006APPEND\020\000\022\r\n\tINCREMENT\020\001\022\007\n\003PUT\020" + + "\002\022\n\n\006DELETE\020\003\"U\n\nDeleteType\022\026\n\022DELETE_ON" + + "E_VERSION\020\000\022\034\n\030DELETE_MULTIPLE_VERSIONS\020" + + "\001\022\021\n\rDELETE_FAMILY\020\002\"i\n\rMutateRequest\022 \n" + + "\006region\030\001 \002(\0132\020.RegionSpecifier\022\027\n\006mutat" + + "e\030\002 \002(\0132\007.Mutate\022\035\n\tcondition\030\003 \001(\0132\n.Co" + + "ndition\"<\n\016MutateResponse\022\027\n\006result\030\001 \001(" + + "\0132\007.Result\022\021\n\tprocessed\030\002 \001(\010\"\232\002\n\004Scan\022\027" + + "\n\006column\030\001 \003(\0132\007.Column\022!\n\tattribute\030\002 \003", + "(\0132\016.NameBytesPair\022\020\n\010startRow\030\003 \001(\014\022\017\n\007" + + "stopRow\030\004 \001(\014\022\036\n\006filter\030\005 \001(\0132\016.NameByte" + + "sPair\022\035\n\ttimeRange\030\006 \001(\0132\n.TimeRange\022\026\n\013" + + "maxVersions\030\007 \001(\r:\0011\022\031\n\013cacheBlocks\030\010 \001(" + + "\010:\004true\022\021\n\tbatchSize\030\t \001(\r\022\026\n\nstoreLimit" + + "\030\n \001(\005:\002-1\022\026\n\013storeOffset\030\013 \001(\005:\0010\"\203\001\n\013S" + + "canRequest\022 \n\006region\030\001 \001(\0132\020.RegionSpeci" + + "fier\022\023\n\004scan\030\002 \001(\0132\005.Scan\022\021\n\tscannerId\030\003" + + " \001(\004\022\024\n\014numberOfRows\030\004 \001(\r\022\024\n\014closeScann" + + "er\030\005 \001(\010\"\\\n\014ScanResponse\022\027\n\006result\030\001 \003(\013", + "2\007.Result\022\021\n\tscannerId\030\002 \001(\004\022\023\n\013moreResu" + + "lts\030\003 \001(\010\022\013\n\003ttl\030\004 \001(\r\"?\n\016LockRowRequest" + + "\022 \n\006region\030\001 \002(\0132\020.RegionSpecifier\022\013\n\003ro" + + "w\030\002 \003(\014\".\n\017LockRowResponse\022\016\n\006lockId\030\001 \002" + + "(\004\022\013\n\003ttl\030\002 \001(\r\"D\n\020UnlockRowRequest\022 \n\006r" + + "egion\030\001 \002(\0132\020.RegionSpecifier\022\016\n\006lockId\030" + + "\002 \002(\004\"\023\n\021UnlockRowResponse\"\232\001\n\024BulkLoadH" + + "FileRequest\022 \n\006region\030\001 \002(\0132\020.RegionSpec" + + "ifier\0224\n\nfamilyPath\030\002 \003(\0132 .BulkLoadHFil" + + "eRequest.FamilyPath\032*\n\nFamilyPath\022\016\n\006fam", + "ily\030\001 \002(\014\022\014\n\004path\030\002 \002(\t\"\'\n\025BulkLoadHFile" + + "Response\022\016\n\006loaded\030\001 \002(\010\"\203\001\n\004Exec\022\013\n\003row" + + "\030\001 \002(\014\022\024\n\014protocolName\030\002 \002(\t\022\022\n\nmethodNa" + + "me\030\003 \002(\t\022!\n\010property\030\004 \003(\0132\017.NameStringP" + + "air\022!\n\tparameter\030\005 \003(\0132\016.NameBytesPair\"O" + + "\n\026ExecCoprocessorRequest\022 \n\006region\030\001 \002(\013" + + "2\020.RegionSpecifier\022\023\n\004call\030\002 \002(\0132\005.Exec\"" + + "8\n\027ExecCoprocessorResponse\022\035\n\005value\030\001 \002(" + + "\0132\016.NameBytesPair\"P\n\014ActionResult\022\035\n\005val" + + "ue\030\001 \001(\0132\016.NameBytesPair\022!\n\texception\030\002 ", + "\001(\0132\016.NameBytesPair\"`\n\014MultiRequest\022 \n\006r" + + "egion\030\001 \002(\0132\020.RegionSpecifier\022\036\n\006action\030" + + "\002 \003(\0132\016.NameBytesPair\022\016\n\006atomic\030\003 \001(\010\".\n" + + "\rMultiResponse\022\035\n\006result\030\001 \003(\0132\r.ActionR" + + "esult2\221\003\n\rClientService\022 \n\003get\022\013.GetRequ" + + "est\032\014.GetResponse\022)\n\006mutate\022\016.MutateRequ" + + "est\032\017.MutateResponse\022#\n\004scan\022\014.ScanReque" + + "st\032\r.ScanResponse\022,\n\007lockRow\022\017.LockRowRe" + + "quest\032\020.LockRowResponse\0222\n\tunlockRow\022\021.U" + + "nlockRowRequest\032\022.UnlockRowResponse\022>\n\rb", + "ulkLoadHFile\022\025.BulkLoadHFileRequest\032\026.Bu" + + "lkLoadHFileResponse\022D\n\017execCoprocessor\022\027" + + ".ExecCoprocessorRequest\032\030.ExecCoprocesso" + + "rResponse\022&\n\005multi\022\r.MultiRequest\032\016.Mult" + + "iResponseBB\n*org.apache.hadoop.hbase.pro" + + "tobuf.generatedB\014ClientProtosH\001\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -20619,7 +20885,7 @@ public final class ClientProtos { internal_static_Get_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_Get_descriptor, - new java.lang.String[] { "Row", "Column", "Attribute", "LockId", "Filter", "TimeRange", "MaxVersions", "CacheBlocks", }, + new java.lang.String[] { "Row", "Column", "Attribute", "LockId", "Filter", "TimeRange", "MaxVersions", "CacheBlocks", "StoreLimit", "StoreOffset", }, org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Get.class, org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Get.Builder.class); internal_static_Result_descriptor = @@ -20699,7 +20965,7 @@ public final class ClientProtos { internal_static_Scan_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_Scan_descriptor, - new java.lang.String[] { "Column", "Attribute", "StartRow", "StopRow", "Filter", "TimeRange", "MaxVersions", "CacheBlocks", "BatchSize", }, + new java.lang.String[] { "Column", "Attribute", "StartRow", "StopRow", "Filter", "TimeRange", "MaxVersions", "CacheBlocks", "BatchSize", "StoreLimit", "StoreOffset", }, org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Scan.class, org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Scan.Builder.class); internal_static_ScanRequest_descriptor = diff --git src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 919d814..52086ce 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -50,6 +50,9 @@ class StoreScanner extends NonLazyKeyValueScanner private KeyValueHeap heap; private boolean cacheBlocks; + private int countPerRow = 0; + private int storeLimit = -1; + private int storeOffset = 0; private String metricNamePrefix; // Used to indicate that the scanner has closed (see HBASE-1107) @@ -132,6 +135,12 @@ class StoreScanner extends NonLazyKeyValueScanner } } + // set storeLimit + this.storeLimit = scan.getMaxResultsPerColumnFamily(); + + // set rowOffset + this.storeOffset = scan.getRowOffsetPerColumnFamily(); + // Combine all seeked scanners with a heap heap = new KeyValueHeap(scanners, store.comparator); @@ -341,6 +350,7 @@ class StoreScanner extends NonLazyKeyValueScanner // only call setRow if the row changes; avoids confusing the query matcher // if scanning intra-row if ((matcher.row == null) || !peeked.matchingRow(matcher.row)) { + this.countPerRow = 0; matcher.setRow(peeked.getRow()); } @@ -370,11 +380,27 @@ class StoreScanner extends NonLazyKeyValueScanner if (f != null) { kv = f.transform(kv); } - results.add(kv); - if (metric != null) { - HRegion.incrNumericMetric(this.metricNamePrefix + metric, + this.countPerRow++; + if (storeLimit > -1 && + this.countPerRow > (storeLimit + storeOffset)) { + // do what SEEK_NEXT_ROW does. + if (!matcher.moreRowsMayExistAfter(kv)) { + outResult.addAll(results); + return false; + } + reseek(matcher.getKeyForNextRow(kv)); + break LOOP; + } + + // add to results only if we have skipped #storeOffset kvs + // also update metric accordingly + if (this.countPerRow > storeOffset) { + if (metric != null) { + HRegion.incrNumericMetric(this.metricNamePrefix + metric, kv.getLength()); + } + results.add(kv); } if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) { @@ -532,6 +558,7 @@ class StoreScanner extends NonLazyKeyValueScanner kv = lastTopKey; } if ((matcher.row == null) || !kv.matchingRow(matcher.row)) { + this.countPerRow = 0; matcher.reset(); matcher.setRow(kv.getRow()); } diff --git src/main/protobuf/Client.proto src/main/protobuf/Client.proto index a7a19e0..12cceed 100644 --- src/main/protobuf/Client.proto +++ src/main/protobuf/Client.proto @@ -46,6 +46,8 @@ message Get { optional TimeRange timeRange = 6; optional uint32 maxVersions = 7 [default = 1]; optional bool cacheBlocks = 8 [default = true]; + optional int32 storeLimit = 9 [default = -1]; + optional int32 storeOffset = 10 [default = 0]; } /** @@ -194,6 +196,8 @@ message Scan { optional uint32 maxVersions = 7 [default = 1]; optional bool cacheBlocks = 8 [default = true]; optional uint32 batchSize = 9; + optional int32 storeLimit = 10 [default = -1]; + optional int32 storeOffset = 11 [default = 0]; } /** diff --git src/test/java/org/apache/hadoop/hbase/HTestConst.java src/test/java/org/apache/hadoop/hbase/HTestConst.java index 62d0079..4bcca41 100644 --- src/test/java/org/apache/hadoop/hbase/HTestConst.java +++ src/test/java/org/apache/hadoop/hbase/HTestConst.java @@ -23,7 +23,10 @@ import java.util.Collections; import org.apache.hadoop.hbase.util.Bytes; -/** Similar to {@link HConstants} but for tests. */ +/** + * Similar to {@link HConstants} but for tests. Also provides some simple + * static utility functions to generate test data. + */ public class HTestConst { private HTestConst() { @@ -39,4 +42,26 @@ public class HTestConst { Collections.unmodifiableSet(new HashSet( Arrays.asList(new String[] { DEFAULT_CF_STR }))); + public static final String DEFAULT_ROW_STR = "MyTestRow"; + public static final byte[] DEFAULT_ROW_BYTES = Bytes.toBytes(DEFAULT_ROW_STR); + + public static final String DEFAULT_QUALIFIER_STR = "MyColumnQualifier"; + public static final byte[] DEFAULT_QUALIFIER_BYTES = Bytes.toBytes(DEFAULT_QUALIFIER_STR); + + public static String DEFAULT_VALUE_STR = "MyTestValue"; + public static byte[] DEFAULT_VALUE_BYTES = Bytes.toBytes(DEFAULT_VALUE_STR); + + /** + * Generate the given number of unique byte sequences by appending numeric + * suffixes (ASCII representations of decimal numbers). + */ + public static byte[][] makeNAscii(byte[] base, int n) { + byte [][] ret = new byte[n][]; + for (int i = 0; i < n; i++) { + byte[] tail = Bytes.toBytes(Integer.toString(i)); + ret[i] = Bytes.add(base, tail); + } + return ret; + } + } diff --git src/test/java/org/apache/hadoop/hbase/client/TestIntraRowPagination.java src/test/java/org/apache/hadoop/hbase/client/TestIntraRowPagination.java new file mode 100644 index 0000000..3c54f88 --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/client/TestIntraRowPagination.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.client; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.HTestConst; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.RegionScanner; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test scan/get offset and limit settings within one row through HRegion API. + */ +@Category(SmallTests.class) +public class TestIntraRowPagination { + + private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + /** + * Test from client side for scan with maxResultPerCF set + * + * @throws Exception + */ + @Test + public void testScanLimitAndOffset() throws Exception { + //byte [] TABLE = HTestConst.DEFAULT_TABLE_BYTES; + byte [][] ROWS = HTestConst.makeNAscii(HTestConst.DEFAULT_ROW_BYTES, 2); + byte [][] FAMILIES = HTestConst.makeNAscii(HTestConst.DEFAULT_CF_BYTES, 3); + byte [][] QUALIFIERS = HTestConst.makeNAscii(HTestConst.DEFAULT_QUALIFIER_BYTES, 10); + + HTableDescriptor htd = new HTableDescriptor(HTestConst.DEFAULT_TABLE_BYTES); + HRegionInfo info = new HRegionInfo(HTestConst.DEFAULT_TABLE_BYTES, null, null, false); + for (byte[] family : FAMILIES) { + HColumnDescriptor hcd = new HColumnDescriptor(family); + htd.addFamily(hcd); + } + HRegion region = + HRegion.createHRegion(info, TEST_UTIL.getDataTestDir(), TEST_UTIL.getConfiguration(), htd); + + Put put; + Scan scan; + Result result; + boolean toLog = true; + + List kvListExp = new ArrayList(); + + int storeOffset = 1; + int storeLimit = 3; + for (int r = 0; r < ROWS.length; r++) { + put = new Put(ROWS[r]); + for (int c = 0; c < FAMILIES.length; c++) { + for (int q = 0; q < QUALIFIERS.length; q++) { + KeyValue kv = + new KeyValue(ROWS[r], FAMILIES[c], QUALIFIERS[q], 1, HTestConst.DEFAULT_VALUE_BYTES); + put.add(kv); + if (storeOffset <= q && q < storeOffset + storeLimit) { + kvListExp.add(kv); + } + } + } + region.put(put); + } + + scan = new Scan(); + scan.setRowOffsetPerColumnFamily(storeOffset); + scan.setMaxResultsPerColumnFamily(storeLimit); + RegionScanner scanner = region.getScanner(scan); + List kvListScan = new ArrayList(); + List results = new ArrayList(); + while (scanner.next(results) || !results.isEmpty()) { + kvListScan.addAll(results); + results.clear(); + } + result = new Result(kvListScan); + TestScannersFromClientSide.verifyResult(result, kvListExp, toLog, + "Testing scan with storeOffset and storeLimit"); + } + +} diff --git src/test/java/org/apache/hadoop/hbase/client/TestScannersFromClientSide.java src/test/java/org/apache/hadoop/hbase/client/TestScannersFromClientSide.java new file mode 100644 index 0000000..981f59d --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/client/TestScannersFromClientSide.java @@ -0,0 +1,453 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HTestConst; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.filter.ColumnPrefixFilter; +import org.apache.hadoop.hbase.filter.ColumnRangeFilter; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * A client-side test, mostly testing scanners with various parameters. + */ +@Category(MediumTests.class) +public class TestScannersFromClientSide { + private static final Log LOG = LogFactory.getLog(TestScannersFromClientSide.class); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static byte [] ROW = Bytes.toBytes("testRow"); + private static byte [] FAMILY = Bytes.toBytes("testFamily"); + private static byte [] QUALIFIER = Bytes.toBytes("testQualifier"); + private static byte [] VALUE = Bytes.toBytes("testValue"); + + /** + * @throws java.lang.Exception + */ + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.startMiniCluster(3); + } + + /** + * @throws java.lang.Exception + */ + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + /** + * @throws java.lang.Exception + */ + @Before + public void setUp() throws Exception { + // Nothing to do. + } + + /** + * @throws java.lang.Exception + */ + @After + public void tearDown() throws Exception { + // Nothing to do. + } + + /** + * Test from client side for batch of scan + * + * @throws Exception + */ + @Test + public void testScanBatch() throws Exception { + byte [] TABLE = Bytes.toBytes("testScanBatch"); + byte [][] QUALIFIERS = HTestConst.makeNAscii(QUALIFIER, 8); + + HTable ht = TEST_UTIL.createTable(TABLE, FAMILY); + + Put put; + Scan scan; + Delete delete; + Result result; + ResultScanner scanner; + boolean toLog = true; + List kvListExp; + + // table: row, family, c0:0, c1:1, ... , c7:7 + put = new Put(ROW); + for (int i=0; i < QUALIFIERS.length; i++) { + KeyValue kv = new KeyValue(ROW, FAMILY, QUALIFIERS[i], i, VALUE); + put.add(kv); + } + ht.put(put); + + // table: row, family, c0:0, c1:1, ..., c6:2, c6:6 , c7:7 + put = new Put(ROW); + KeyValue kv = new KeyValue(ROW, FAMILY, QUALIFIERS[6], 2, VALUE); + put.add(kv); + ht.put(put); + + // delete upto ts: 3 + delete = new Delete(ROW); + delete.deleteFamily(FAMILY, 3); + ht.delete(delete); + + // without batch + scan = new Scan(ROW); + scan.setMaxVersions(); + scanner = ht.getScanner(scan); + + // c4:4, c5:5, c6:6, c7:7 + kvListExp = new ArrayList(); + kvListExp.add(new KeyValue(ROW, FAMILY, QUALIFIERS[4], 4, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILY, QUALIFIERS[5], 5, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILY, QUALIFIERS[6], 6, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILY, QUALIFIERS[7], 7, VALUE)); + result = scanner.next(); + verifyResult(result, kvListExp, toLog, "Testing first batch of scan"); + + // with batch + scan = new Scan(ROW); + scan.setMaxVersions(); + scan.setBatch(2); + scanner = ht.getScanner(scan); + + // First batch: c4:4, c5:5 + kvListExp = new ArrayList(); + kvListExp.add(new KeyValue(ROW, FAMILY, QUALIFIERS[4], 4, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILY, QUALIFIERS[5], 5, VALUE)); + result = scanner.next(); + verifyResult(result, kvListExp, toLog, "Testing first batch of scan"); + + // Second batch: c6:6, c7:7 + kvListExp = new ArrayList(); + kvListExp.add(new KeyValue(ROW, FAMILY, QUALIFIERS[6], 6, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILY, QUALIFIERS[7], 7, VALUE)); + result = scanner.next(); + verifyResult(result, kvListExp, toLog, "Testing second batch of scan"); + + } + + /** + * Test from client side for get with maxResultPerCF set + * + * @throws Exception + */ + @Test + public void testGetMaxResults() throws Exception { + byte [] TABLE = Bytes.toBytes("testGetMaxResults"); + byte [][] FAMILIES = HTestConst.makeNAscii(FAMILY, 3); + byte [][] QUALIFIERS = HTestConst.makeNAscii(QUALIFIER, 20); + + HTable ht = TEST_UTIL.createTable(TABLE, FAMILIES); + + Get get; + Put put; + Result result; + boolean toLog = true; + List kvListExp; + + kvListExp = new ArrayList(); + // Insert one CF for row[0] + put = new Put(ROW); + for (int i=0; i < 10; i++) { + KeyValue kv = new KeyValue(ROW, FAMILIES[0], QUALIFIERS[i], 1, VALUE); + put.add(kv); + kvListExp.add(kv); + } + ht.put(put); + + get = new Get(ROW); + result = ht.get(get); + verifyResult(result, kvListExp, toLog, "Testing without setting maxResults"); + + get = new Get(ROW); + get.setMaxResultsPerColumnFamily(2); + result = ht.get(get); + kvListExp = new ArrayList(); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[0], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[1], 1, VALUE)); + verifyResult(result, kvListExp, toLog, "Testing basic setMaxResults"); + + // Filters: ColumnRangeFilter + get = new Get(ROW); + get.setMaxResultsPerColumnFamily(5); + get.setFilter(new ColumnRangeFilter(QUALIFIERS[2], true, QUALIFIERS[5], + true)); + result = ht.get(get); + kvListExp = new ArrayList(); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[2], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[3], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[4], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[5], 1, VALUE)); + verifyResult(result, kvListExp, toLog, "Testing single CF with CRF"); + + // Insert two more CF for row[0] + // 20 columns for CF2, 10 columns for CF1 + put = new Put(ROW); + for (int i=0; i < QUALIFIERS.length; i++) { + KeyValue kv = new KeyValue(ROW, FAMILIES[2], QUALIFIERS[i], 1, VALUE); + put.add(kv); + } + ht.put(put); + + put = new Put(ROW); + for (int i=0; i < 10; i++) { + KeyValue kv = new KeyValue(ROW, FAMILIES[1], QUALIFIERS[i], 1, VALUE); + put.add(kv); + } + ht.put(put); + + get = new Get(ROW); + get.setMaxResultsPerColumnFamily(12); + get.addFamily(FAMILIES[1]); + get.addFamily(FAMILIES[2]); + result = ht.get(get); + kvListExp = new ArrayList(); + //Exp: CF1:q0, ..., q9, CF2: q0, q1, q10, q11, ..., q19 + for (int i=0; i < 10; i++) { + kvListExp.add(new KeyValue(ROW, FAMILIES[1], QUALIFIERS[i], 1, VALUE)); + } + for (int i=0; i < 2; i++) { + kvListExp.add(new KeyValue(ROW, FAMILIES[2], QUALIFIERS[i], 1, VALUE)); + } + for (int i=10; i < 20; i++) { + kvListExp.add(new KeyValue(ROW, FAMILIES[2], QUALIFIERS[i], 1, VALUE)); + } + verifyResult(result, kvListExp, toLog, "Testing multiple CFs"); + + // Filters: ColumnRangeFilter and ColumnPrefixFilter + get = new Get(ROW); + get.setMaxResultsPerColumnFamily(3); + get.setFilter(new ColumnRangeFilter(QUALIFIERS[2], true, null, true)); + result = ht.get(get); + kvListExp = new ArrayList(); + for (int i=2; i < 5; i++) { + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[i], 1, VALUE)); + } + for (int i=2; i < 5; i++) { + kvListExp.add(new KeyValue(ROW, FAMILIES[1], QUALIFIERS[i], 1, VALUE)); + } + for (int i=2; i < 5; i++) { + kvListExp.add(new KeyValue(ROW, FAMILIES[2], QUALIFIERS[i], 1, VALUE)); + } + verifyResult(result, kvListExp, toLog, "Testing multiple CFs + CRF"); + + get = new Get(ROW); + get.setMaxResultsPerColumnFamily(7); + get.setFilter(new ColumnPrefixFilter(QUALIFIERS[1])); + result = ht.get(get); + kvListExp = new ArrayList(); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[1], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[1], QUALIFIERS[1], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[2], QUALIFIERS[1], 1, VALUE)); + for (int i=10; i < 16; i++) { + kvListExp.add(new KeyValue(ROW, FAMILIES[2], QUALIFIERS[i], 1, VALUE)); + } + verifyResult(result, kvListExp, toLog, "Testing multiple CFs + PFF"); + + } + + /** + * Test from client side for scan with maxResultPerCF set + * + * @throws Exception + */ + @Test + public void testScanMaxResults() throws Exception { + byte [] TABLE = Bytes.toBytes("testScanLimit"); + byte [][] ROWS = HTestConst.makeNAscii(ROW, 2); + byte [][] FAMILIES = HTestConst.makeNAscii(FAMILY, 3); + byte [][] QUALIFIERS = HTestConst.makeNAscii(QUALIFIER, 10); + + HTable ht = TEST_UTIL.createTable(TABLE, FAMILIES); + + Put put; + Scan scan; + Result result; + boolean toLog = true; + List kvListExp, kvListScan; + + kvListExp = new ArrayList(); + + for (int r=0; r < ROWS.length; r++) { + put = new Put(ROWS[r]); + for (int c=0; c < FAMILIES.length; c++) { + for (int q=0; q < QUALIFIERS.length; q++) { + KeyValue kv = new KeyValue(ROWS[r], FAMILIES[c], QUALIFIERS[q], 1, VALUE); + put.add(kv); + if (q < 4) { + kvListExp.add(kv); + } + } + } + ht.put(put); + } + + scan = new Scan(); + scan.setMaxResultsPerColumnFamily(4); + ResultScanner scanner = ht.getScanner(scan); + kvListScan = new ArrayList(); + while ((result = scanner.next()) != null) { + for (KeyValue kv : result.list()) { + kvListScan.add(kv); + } + } + result = new Result(kvListScan); + verifyResult(result, kvListExp, toLog, "Testing scan with maxResults"); + + } + + /** + * Test from client side for get with rowOffset + * + * @throws Exception + */ + @Test + public void testGetRowOffset() throws Exception { + byte [] TABLE = Bytes.toBytes("testGetRowOffset"); + byte [][] FAMILIES = HTestConst.makeNAscii(FAMILY, 3); + byte [][] QUALIFIERS = HTestConst.makeNAscii(QUALIFIER, 20); + + HTable ht = TEST_UTIL.createTable(TABLE, FAMILIES); + + Get get; + Put put; + Result result; + boolean toLog = true; + List kvListExp; + + // Insert one CF for row + kvListExp = new ArrayList(); + put = new Put(ROW); + for (int i=0; i < 10; i++) { + KeyValue kv = new KeyValue(ROW, FAMILIES[0], QUALIFIERS[i], 1, VALUE); + put.add(kv); + // skipping first two kvs + if (i < 2) continue; + kvListExp.add(kv); + } + ht.put(put); + + //setting offset to 2 + get = new Get(ROW); + get.setRowOffsetPerColumnFamily(2); + result = ht.get(get); + verifyResult(result, kvListExp, toLog, "Testing basic setRowOffset"); + + //setting offset to 20 + get = new Get(ROW); + get.setRowOffsetPerColumnFamily(20); + result = ht.get(get); + kvListExp = new ArrayList(); + verifyResult(result, kvListExp, toLog, "Testing offset > #kvs"); + + //offset + maxResultPerCF + get = new Get(ROW); + get.setRowOffsetPerColumnFamily(4); + get.setMaxResultsPerColumnFamily(5); + result = ht.get(get); + kvListExp = new ArrayList(); + for (int i=4; i < 9; i++) { + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[i], 1, VALUE)); + } + verifyResult(result, kvListExp, toLog, + "Testing offset + setMaxResultsPerCF"); + + // Filters: ColumnRangeFilter + get = new Get(ROW); + get.setRowOffsetPerColumnFamily(1); + get.setFilter(new ColumnRangeFilter(QUALIFIERS[2], true, QUALIFIERS[5], + true)); + result = ht.get(get); + kvListExp = new ArrayList(); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[3], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[4], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[0], QUALIFIERS[5], 1, VALUE)); + verifyResult(result, kvListExp, toLog, "Testing offset with CRF"); + + // Insert into two more CFs for row + // 10 columns for CF2, 10 columns for CF1 + for(int j=2; j > 0; j--) { + put = new Put(ROW); + for (int i=0; i < 10; i++) { + KeyValue kv = new KeyValue(ROW, FAMILIES[j], QUALIFIERS[i], 1, VALUE); + put.add(kv); + } + ht.put(put); + } + + get = new Get(ROW); + get.setRowOffsetPerColumnFamily(4); + get.setMaxResultsPerColumnFamily(2); + get.addFamily(FAMILIES[1]); + get.addFamily(FAMILIES[2]); + result = ht.get(get); + kvListExp = new ArrayList(); + //Exp: CF1:q4, q5, CF2: q4, q5 + kvListExp.add(new KeyValue(ROW, FAMILIES[1], QUALIFIERS[4], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[1], QUALIFIERS[5], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[2], QUALIFIERS[4], 1, VALUE)); + kvListExp.add(new KeyValue(ROW, FAMILIES[2], QUALIFIERS[5], 1, VALUE)); + verifyResult(result, kvListExp, toLog, + "Testing offset + multiple CFs + maxResults"); + + } + + static void verifyResult(Result result, List expKvList, boolean toLog, + String msg) { + + LOG.info(msg); + LOG.info("Expected count: " + expKvList.size()); + LOG.info("Actual count: " + result.size()); + if (expKvList.size() == 0) + return; + + int i = 0; + for (KeyValue kv : result.sorted()) { + if (i >= expKvList.size()) { + break; // we will check the size later + } + + KeyValue kvExp = expKvList.get(i++); + if (toLog) { + LOG.info("get kv is: " + kv.toString()); + LOG.info("exp kv is: " + kvExp.toString()); + } + assertTrue("Not equal", kvExp.equals(kv)); + } + + assertEquals(expKvList.size(), result.size()); + } + +} -- 1.7.4.4