From e53f2b6f27b1d532b9a561e6a603670ce9482e24 Mon Sep 17 00:00:00 2001 From: eshcar Date: Thu, 24 Sep 2015 16:47:04 +0300 Subject: [PATCH] HBASE-13408 minor fixes after rebase --- .../hbase/mob/mapreduce/MemStoreWrapper.java | 6 +- .../procedure/flush/FlushTableSubprocedure.java | 9 +- .../hbase/regionserver/AbstractMemStore.java | 528 ++ .../apache/hadoop/hbase/regionserver/CellSet.java | 185 + .../hadoop/hbase/regionserver/CellSkipListSet.java | 185 - .../hbase/regionserver/CompactedMemStore.java | 334 ++ .../hbase/regionserver/CompactionPipeline.java | 226 + .../hadoop/hbase/regionserver/DefaultMemStore.java | 862 +-- .../hbase/regionserver/FlushAllStoresPolicy.java | 6 +- .../hbase/regionserver/FlushLargeStoresPolicy.java | 15 +- .../hadoop/hbase/regionserver/FlushPolicy.java | 35 +- .../hadoop/hbase/regionserver/FlushRequester.java | 4 +- .../hadoop/hbase/regionserver/HMobStore.java | 24 +- .../apache/hadoop/hbase/regionserver/HRegion.java | 417 +- .../hadoop/hbase/regionserver/HRegionServer.java | 91 +- .../apache/hadoop/hbase/regionserver/HStore.java | 110 +- .../hbase/regionserver/ImmutableSegment.java | 64 + .../regionserver/ImmutableSegmentAdapter.java | 93 + .../hadoop/hbase/regionserver/LogRoller.java | 7 +- .../apache/hadoop/hbase/regionserver/MemStore.java | 14 +- .../hbase/regionserver/MemStoreCompactor.java | 236 + .../hadoop/hbase/regionserver/MemStoreFlusher.java | 17 +- .../hadoop/hbase/regionserver/MemStoreScanner.java | 317 + .../hbase/regionserver/MemStoreSnapshot.java | 13 +- .../hbase/regionserver/MutableCellSetSegment.java | 227 + .../regionserver/MutableCellSetSegmentScanner.java | 433 ++ .../hadoop/hbase/regionserver/MutableSegment.java | 57 + .../hadoop/hbase/regionserver/RSRpcServices.java | 133 +- .../apache/hadoop/hbase/regionserver/Region.java | 31 +- .../hbase/regionserver/RegionServerAccounting.java | 15 +- .../apache/hadoop/hbase/regionserver/Store.java | 27 +- .../hbase/regionserver/StoreFlushContext.java | 11 +- .../hadoop/hbase/regionserver/StoreSegment.java | 154 + .../hbase/regionserver/StoreSegmentFactory.java | 86 + .../hbase/regionserver/StoreSegmentScanner.java | 44 + .../hbase/regionserver/VersionedSegmentsList.java | 54 + .../hadoop/hbase/regionserver/wal/FSHLog.java | 14 + .../regionserver/wal/SequenceIdAccounting.java | 44 +- .../hadoop/hbase/wal/DisabledWALProvider.java | 26 +- .../main/java/org/apache/hadoop/hbase/wal/WAL.java | 37 +- .../apache/hadoop/hbase/HBaseTestingUtility.java | 13 + .../hadoop/hbase/TestGlobalMemStoreSize.java | 16 +- .../org/apache/hadoop/hbase/TestIOFencing.java | 32 +- .../org/apache/hadoop/hbase/io/TestHeapSize.java | 49 +- .../hbase/regionserver/TestCellSkipListSet.java | 6 +- .../hbase/regionserver/TestCompactedMemStore.java | 1394 +++++ .../hbase/regionserver/TestDefaultMemStore.java | 140 +- .../hbase/regionserver/TestFlushRegionEntry.java | 6 +- .../hadoop/hbase/regionserver/TestHMobStore.java | 34 +- .../hadoop/hbase/regionserver/TestHRegion.java | 97 +- .../regionserver/TestHRegionWithInMemoryFlush.java | 6236 ++++++++++++++++++++ .../hbase/regionserver/TestHeapMemoryManager.java | 42 +- .../hbase/regionserver/TestMemStoreChunkPool.java | 35 +- .../hbase/regionserver/TestReversibleScanners.java | 25 +- .../hbase/regionserver/TestSplitWalDataLoss.java | 2 +- .../hadoop/hbase/regionserver/TestStore.java | 50 +- .../TestWalAndCompactedMemstoreFlush.java | 505 ++ .../hbase/regionserver/wal/TestWALReplay.java | 26 +- 58 files changed, 12125 insertions(+), 1774 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMemStore.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSkipListSet.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactedMemStore.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionPipeline.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ImmutableSegment.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ImmutableSegmentAdapter.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreCompactor.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreScanner.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableCellSetSegment.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableCellSetSegmentScanner.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableSegment.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegment.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegmentFactory.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegmentScanner.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/VersionedSegmentsList.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactedMemStore.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionWithInMemoryFlush.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWalAndCompactedMemstoreFlush.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/mapreduce/MemStoreWrapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/mapreduce/MemStoreWrapper.java index fdda1de..2b7aa6f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/mapreduce/MemStoreWrapper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/mapreduce/MemStoreWrapper.java @@ -18,8 +18,6 @@ */ package org.apache.hadoop.hbase.mob.mapreduce; -import java.io.IOException; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -48,6 +46,8 @@ import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Reducer.Context; +import java.io.IOException; + /** * The wrapper of a DefaultMemStore. * This wrapper is used in the sweep reducer to buffer and sort the cells written from @@ -112,7 +112,7 @@ public class MemStoreWrapper { * @throws IOException */ public void flushMemStore() throws IOException { - MemStoreSnapshot snapshot = memstore.snapshot(); + MemStoreSnapshot snapshot = memstore.snapshot(0); internalFlushCache(snapshot); memstore.clearSnapshot(snapshot.getId()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/FlushTableSubprocedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/FlushTableSubprocedure.java index 5723919..baa280e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/FlushTableSubprocedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/FlushTableSubprocedure.java @@ -17,9 +17,6 @@ */ package org.apache.hadoop.hbase.procedure.flush; -import java.util.List; -import java.util.concurrent.Callable; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; @@ -30,6 +27,9 @@ import org.apache.hadoop.hbase.procedure.Subprocedure; import org.apache.hadoop.hbase.procedure.flush.RegionServerFlushTableProcedureManager.FlushTableSubprocedurePool; import org.apache.hadoop.hbase.regionserver.Region; +import java.util.List; +import java.util.concurrent.Callable; + /** * This flush region implementation uses the distributed procedure framework to flush * table regions. @@ -65,7 +65,8 @@ public class FlushTableSubprocedure extends Subprocedure { region.startRegionOperation(); try { LOG.debug("Flush region " + region.toString() + " started..."); - region.flush(true); + boolean forceFlushInsteadOfCompaction = false; + region.flush(true,forceFlushInsteadOfCompaction); } finally { LOG.debug("Closing region operation on " + region); region.closeRegionOperation(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMemStore.java new file mode 100644 index 0000000..2cd7ad5 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMemStore.java @@ -0,0 +1,528 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Pair; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableSet; +import java.util.SortedSet; + +/** + * An abstract class, which implements the behaviour shared by all concrete memstore instances. + */ +@InterfaceAudience.Private +public abstract class AbstractMemStore implements MemStore { + + private final Configuration conf; + private final CellComparator comparator; + + // active segment absorbs write operations + volatile private MutableSegment active; + // Snapshot of memstore. Made for flusher. + volatile private ImmutableSegment snapshot; + volatile long snapshotId; + // Used to track when to flush + volatile private long timeOfOldestEdit; + + public final static long FIXED_OVERHEAD = ClassSize.align( + ClassSize.OBJECT + + (4 * ClassSize.REFERENCE) + + (2 * Bytes.SIZEOF_LONG)); + + public final static long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD + + 2 * (ClassSize.ATOMIC_LONG + ClassSize.TIMERANGE_TRACKER + + ClassSize.CELL_SKIPLIST_SET + ClassSize.CONCURRENT_SKIPLISTMAP)); + + + protected AbstractMemStore(final Configuration conf, final CellComparator c) { + this.conf = conf; + this.comparator = c; + resetCellSet(); + this.snapshot = StoreSegmentFactory.instance().createImmutableSegment(conf, c, 0); + + } + + protected void resetCellSet() { + // Reset heap to not include any keys + this.active = StoreSegmentFactory.instance().createMutableSegment( + conf, comparator, deepOverhead()); + this.timeOfOldestEdit = Long.MAX_VALUE; + } + + /* + * Calculate how the MemStore size has changed. Includes overhead of the + * backing Map. + * @param cell + * @param notpresent True if the cell was NOT present in the set. + * @return change in size + */ + static long heapSizeChange(final Cell cell, final boolean notpresent) { + return notpresent ? ClassSize.align(ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY + + CellUtil.estimatedHeapSizeOf(cell)) : 0; + } + + /** + * Sets the force flush to disk mode on. + * @return this memstore + */ + public abstract AbstractMemStore setForceFlushToDisk(); + + /** + * Returns true if the force flush to disk mode is set + * @return true if the force flush to disk mode is set + */ + abstract boolean isForceFlushToDisk(); + + + /** + * Returns true if an in-memory compaction is in progress + * @return true if an in-memory compaction is in progress + */ + public abstract boolean isMemStoreInCompaction(); + + /** + * Flushes the active segment into a different (immutable) memory segments. + * @param flushOpSeqId the sequence id used by the wal to mark this flush + */ + public abstract void flushInMemory(long flushOpSeqId); + + /** + * Updates the wal with the lowest sequence id (oldest entry) that is still in memory + * @param onlyIfGreater a flag that marks whether to update the sequence id no matter what or + * only if it is greater than the previous sequence id + */ + public abstract void updateLowestUnflushedSequenceIdInWal(boolean onlyIfGreater); + + //method for tests + /** + * Returns true if the memstore supports in-memory compaction + * @return true if the memstore supports in-memory compaction + */ + public abstract boolean isCompactedMemStore(); + + boolean shouldFlushInMemory() { + return !isForceFlushToDisk(); + } + + protected long deepOverhead() { + return DEEP_OVERHEAD; + } + + /** + * Write an update + * @param cell + * @return approximate size of the passed cell & newly added cell which maybe different than the + * passed-in cell + */ + @Override + public Pair add(Cell cell) { + Cell toAdd = maybeCloneWithAllocator(cell); + return new Pair(internalAdd(toAdd), toAdd); + } + + /** + * Update or insert the specified KeyValues. + *

+ * For each KeyValue, insert into MemStore. This will atomically upsert the + * value for that row/family/qualifier. If a KeyValue did already exist, + * it will then be removed. + *

+ * Currently the memstoreTS is kept at 0 so as each insert happens, it will + * be immediately visible. May want to change this so it is atomic across + * all KeyValues. + *

+ * This is called under row lock, so Get operations will still see updates + * atomically. Scans will only see each KeyValue update as atomic. + * + * @param cells + * @param readpoint readpoint below which we can safely remove duplicate KVs + * @return change in memstore size + */ + @Override + public long upsert(Iterable cells, long readpoint) { + long size = 0; + for (Cell cell : cells) { + size += upsert(cell, readpoint); + } + return size; + } + + /** + * @return Oldest timestamp of all the Cells in the MemStore + */ + @Override + public long timeOfOldestEdit() { + return timeOfOldestEdit; + } + + + /** + * Write a delete + * @param deleteCell + * @return approximate size of the passed key and value. + */ + @Override + public long delete(Cell deleteCell) { + Cell toAdd = maybeCloneWithAllocator(deleteCell); + long s = internalAdd(toAdd); + return s; + } + + /** + * The passed snapshot was successfully persisted; it can be let go. + * @param id Id of the snapshot to clean out. + * @throws UnexpectedStateException + * @see MemStore#snapshot(long) + */ + @Override + public void clearSnapshot(long id) throws UnexpectedStateException { + if (this.snapshotId != id) { + throw new UnexpectedStateException("Current snapshot id is " + this.snapshotId + ",passed " + + id); + } + // OK. Passed in snapshot is same as current snapshot. If not-empty, + // create a new snapshot and let the old one go. + StoreSegment oldSnapshot = this.snapshot; + if (!this.snapshot.isEmpty()) { + this.snapshot = StoreSegmentFactory.instance().createImmutableSegment( + getComparator(), 0); + } + this.snapshotId = -1; + oldSnapshot.close(); + } + + /** + * Get the entire heap usage for this MemStore not including keys in the + * snapshot. + */ + @Override + public long heapSize() { + return getActive().getSize(); + } + + /** + * On flush, how much memory we will clear from the active cell set. + * + * @return size of data that is going to be flushed from active set + */ + @Override + public long getFlushableSize() { + long snapshotSize = getSnapshot().getSize(); + return snapshotSize > 0 ? snapshotSize : keySize(); + } + + + /** + * @return scanner on memstore and snapshot in this order. + */ + @Override + public List getScanners(long readPt) throws IOException { + return Collections. singletonList(new MemStoreScanner(this, readPt)); + } + + @Override + public long getSnapshotSize() { + return getSnapshot().getSize(); + } + + @Override + public String toString() { + String res = ""; + int i = 1; + try { + for (StoreSegment segment : getListOfSegments()) { + res += "Segment (" + i + ") " + segment.toString() + "; "; + i++; + } + } catch (IOException e){ + return e.toString(); + } + return res; + } + + protected void rollbackSnapshot(Cell cell) { + // If the key is in the snapshot, delete it. We should not update + // this.size, because that tracks the size of only the memstore and + // not the snapshot. The flush of this snapshot to disk has not + // yet started because Store.flush() waits for all rwcc transactions to + // commit before starting the flush to disk. + snapshot.rollback(cell); + } + + protected void rollbackActive(Cell cell) { + // If the key is in the memstore, delete it. Update this.size. + long sz = active.rollback(cell); + if (sz != 0) { + setOldestEditTimeToNow(); + } + } + + protected Configuration getConfiguration() { + return conf; + } + + protected void dump(Log log) { + active.dump(log); + snapshot.dump(log); + } + + + /** + * Inserts the specified KeyValue into MemStore and deletes any existing + * versions of the same row/family/qualifier as the specified KeyValue. + *

+ * First, the specified KeyValue is inserted into the Memstore. + *

+ * If there are any existing KeyValues in this MemStore with the same row, + * family, and qualifier, they are removed. + *

+ * Callers must hold the read lock. + * + * @param cell + * @param readpoint + * @return change in size of MemStore + */ + private long upsert(Cell cell, long readpoint) { + // Add the Cell to the MemStore + // Use the internalAdd method here since we (a) already have a lock + // and (b) cannot safely use the MSLAB here without potentially + // hitting OOME - see TestMemStore.testUpsertMSLAB for a + // test that triggers the pathological case if we don't avoid MSLAB + // here. + long addedSize = internalAdd(cell); + + // Get the Cells for the row/family/qualifier regardless of timestamp. + // For this case we want to clean up any other puts + Cell firstCell = KeyValueUtil.createFirstOnRow( + cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), + cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); + SortedSet ss = active.tailSet(firstCell); + Iterator it = ss.iterator(); + // versions visible to oldest scanner + int versionsVisible = 0; + while (it.hasNext()) { + Cell cur = it.next(); + + if (cell == cur) { + // ignore the one just put in + continue; + } + // check that this is the row and column we are interested in, otherwise bail + if (CellUtil.matchingRow(cell, cur) && CellUtil.matchingQualifier(cell, cur)) { + // only remove Puts that concurrent scanners cannot possibly see + if (cur.getTypeByte() == KeyValue.Type.Put.getCode() && + cur.getSequenceId() <= readpoint) { + if (versionsVisible >= 1) { + // if we get here we have seen at least one version visible to the oldest scanner, + // which means we can prove that no scanner will see this version + + // false means there was a change, so give us the size. + long delta = heapSizeChange(cur, true); + addedSize -= delta; + active.incSize(-delta); + it.remove(); + setOldestEditTimeToNow(); + } else { + versionsVisible++; + } + } + } else { + // past the row or column, done + break; + } + } + return addedSize; + } + + /* + * @param a + * @param b + * @return Return lowest of a or b or null if both a and b are null + */ + protected Cell getLowest(final Cell a, final Cell b) { + if (a == null) { + return b; + } + if (b == null) { + return a; + } + return comparator.compareRows(a, b) <= 0? a: b; + } + + /* + * @param key Find row that follows this one. If null, return first. + * @param set Set to look in for a row beyond row. + * @return Next row or null if none found. If one found, will be a new + * KeyValue -- can be destroyed by subsequent calls to this method. + */ + protected Cell getNextRow(final Cell key, + final NavigableSet set) { + Cell result = null; + SortedSet tail = key == null? set: set.tailSet(key); + // Iterate until we fall into the next row; i.e. move off current row + for (Cell cell: tail) { + if (comparator.compareRows(cell, key) <= 0) + continue; + // Note: Not suppressing deletes or expired cells. Needs to be handled + // by higher up functions. + result = cell; + break; + } + return result; + } + + /** + * Given the specs of a column, update it, first by inserting a new record, + * then removing the old one. Since there is only 1 KeyValue involved, the memstoreTS + * will be set to 0, thus ensuring that they instantly appear to anyone. The underlying + * store will ensure that the insert/delete each are atomic. A scanner/reader will either + * get the new value, or the old value and all readers will eventually only see the new + * value after the old was removed. + * + * @param row + * @param family + * @param qualifier + * @param newValue + * @param now + * @return Timestamp + */ + @VisibleForTesting + @Override + public long updateColumnValue(byte[] row, byte[] family, byte[] qualifier, + long newValue, long now) { + Cell firstCell = KeyValueUtil.createFirstOnRow(row, family, qualifier); + // Is there a Cell in 'snapshot' with the same TS? If so, upgrade the timestamp a bit. + Cell snc = snapshot.getFirstAfter(firstCell); + if(snc != null) { + // is there a matching Cell in the snapshot? + if (CellUtil.matchingRow(snc, firstCell) && CellUtil.matchingQualifier(snc, firstCell)) { + if (snc.getTimestamp() == now) { + now += 1; + } + } + } + // logic here: the new ts MUST be at least 'now'. But it could be larger if necessary. + // But the timestamp should also be max(now, mostRecentTsInMemstore) + + // so we cant add the new Cell w/o knowing what's there already, but we also + // want to take this chance to delete some cells. So two loops (sad) + + SortedSet ss = getActive().tailSet(firstCell); + for (Cell cell : ss) { + // if this isnt the row we are interested in, then bail: + if (!CellUtil.matchingColumn(cell, family, qualifier) + || !CellUtil.matchingRow(cell, firstCell)) { + break; // rows dont match, bail. + } + + // if the qualifier matches and it's a put, just RM it out of the active. + if (cell.getTypeByte() == KeyValue.Type.Put.getCode() && + cell.getTimestamp() > now && CellUtil.matchingQualifier(firstCell, cell)) { + now = cell.getTimestamp(); + } + } + + // create or update (upsert) a new Cell with + // 'now' and a 0 memstoreTS == immediately visible + List cells = new ArrayList(1); + cells.add(new KeyValue(row, family, qualifier, now, Bytes.toBytes(newValue))); + return upsert(cells, 1L); + } + + private Cell maybeCloneWithAllocator(Cell cell) { + return active.maybeCloneWithAllocator(cell); + } + + /** + * Internal version of add() that doesn't clone Cells with the + * allocator, and doesn't take the lock. + * + * Callers should ensure they already have the read lock taken + */ + private long internalAdd(final Cell toAdd) { + long s = active.add(toAdd); + setOldestEditTimeToNow(); + return s; + } + + private void setOldestEditTimeToNow() { + if (timeOfOldestEdit == Long.MAX_VALUE) { + timeOfOldestEdit = EnvironmentEdgeManager.currentTime(); + } + } + + protected long keySize() { + return heapSize() - deepOverhead(); + } + + protected CellComparator getComparator() { + return comparator; + } + + protected MutableSegment getActive() { + return active; + } + + protected ImmutableSegment getSnapshot() { + return snapshot; + } + + protected AbstractMemStore setSnapshot(ImmutableSegment snapshot) { + this.snapshot = snapshot; + return this; + } + + protected void setSnapshotSize(long snapshotSize) { + getSnapshot().setSize(snapshotSize); + } + + /** + * Returns a list of Store segment scanners, one per each store segment + * @param readPt the version number required to initialize the scanners + * @return a list of Store segment scanners, one per each store segment + * @throws IOException + */ + protected abstract List getListOfScanners(long readPt) throws IOException; + + /** + * Returns an ordered list of segments from most recent to oldest in memstore + * @return an ordered list of segments from most recent to oldest in memstore + * @throws IOException + */ + protected abstract List getListOfSegments() throws IOException; + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java new file mode 100644 index 0000000..85b62ce --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java @@ -0,0 +1,185 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.NavigableSet; +import java.util.SortedSet; +import java.util.concurrent.ConcurrentNavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; + +/** + * A {@link java.util.Set} of {@link Cell}s implemented on top of a + * {@link java.util.concurrent.ConcurrentSkipListMap}. Works like a + * {@link java.util.concurrent.ConcurrentSkipListSet} in all but one regard: + * An add will overwrite if already an entry for the added key. In other words, + * where CSLS does "Adds the specified element to this set if it is not already + * present.", this implementation "Adds the specified element to this set EVEN + * if it is already present overwriting what was there previous". The call to + * add returns true if no value in the backing map or false if there was an + * entry with same key (though value may be different). + *

Otherwise, + * has same attributes as ConcurrentSkipListSet: e.g. tolerant of concurrent + * get and set and won't throw ConcurrentModificationException when iterating. + */ +@InterfaceAudience.Private +public class CellSet implements NavigableSet { + private final ConcurrentNavigableMap delegatee; + + CellSet(final CellComparator c) { + this.delegatee = new ConcurrentSkipListMap(c); + } + + CellSet(final ConcurrentNavigableMap m) { + this.delegatee = m; + } + + public Cell ceiling(Cell e) { + throw new UnsupportedOperationException("Not implemented"); + } + + public Iterator descendingIterator() { + return this.delegatee.descendingMap().values().iterator(); + } + + public NavigableSet descendingSet() { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell floor(Cell e) { + throw new UnsupportedOperationException("Not implemented"); + } + + public SortedSet headSet(final Cell toElement) { + return headSet(toElement, false); + } + + public NavigableSet headSet(final Cell toElement, + boolean inclusive) { + return new CellSet(this.delegatee.headMap(toElement, inclusive)); + } + + public Cell higher(Cell e) { + throw new UnsupportedOperationException("Not implemented"); + } + + public Iterator iterator() { + return this.delegatee.values().iterator(); + } + + public Cell lower(Cell e) { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell pollFirst() { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell pollLast() { + throw new UnsupportedOperationException("Not implemented"); + } + + public SortedSet subSet(Cell fromElement, Cell toElement) { + throw new UnsupportedOperationException("Not implemented"); + } + + public NavigableSet subSet(Cell fromElement, + boolean fromInclusive, Cell toElement, boolean toInclusive) { + throw new UnsupportedOperationException("Not implemented"); + } + + public SortedSet tailSet(Cell fromElement) { + return tailSet(fromElement, true); + } + + public NavigableSet tailSet(Cell fromElement, boolean inclusive) { + return new CellSet(this.delegatee.tailMap(fromElement, inclusive)); + } + + public Comparator comparator() { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell first() { + return this.delegatee.get(this.delegatee.firstKey()); + } + + public Cell last() { + return this.delegatee.get(this.delegatee.lastKey()); + } + + public boolean add(Cell e) { + return this.delegatee.put(e, e) == null; + } + + public boolean addAll(Collection c) { + throw new UnsupportedOperationException("Not implemented"); + } + + public void clear() { + this.delegatee.clear(); + } + + public boolean contains(Object o) { + //noinspection SuspiciousMethodCalls + return this.delegatee.containsKey(o); + } + + public boolean containsAll(Collection c) { + throw new UnsupportedOperationException("Not implemented"); + } + + public boolean isEmpty() { + return this.delegatee.isEmpty(); + } + + public boolean remove(Object o) { + return this.delegatee.remove(o) != null; + } + + public boolean removeAll(Collection c) { + throw new UnsupportedOperationException("Not implemented"); + } + + public boolean retainAll(Collection c) { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell get(Cell kv) { + return this.delegatee.get(kv); + } + + public int size() { + return this.delegatee.size(); + } + + public Object[] toArray() { + throw new UnsupportedOperationException("Not implemented"); + } + + public T[] toArray(T[] a) { + throw new UnsupportedOperationException("Not implemented"); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSkipListSet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSkipListSet.java deleted file mode 100644 index e9941b3..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSkipListSet.java +++ /dev/null @@ -1,185 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver; - -import java.util.Collection; -import java.util.Comparator; -import java.util.Iterator; -import java.util.NavigableSet; -import java.util.SortedSet; -import java.util.concurrent.ConcurrentNavigableMap; -import java.util.concurrent.ConcurrentSkipListMap; - -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.classification.InterfaceAudience; - -/** - * A {@link java.util.Set} of {@link Cell}s implemented on top of a - * {@link java.util.concurrent.ConcurrentSkipListMap}. Works like a - * {@link java.util.concurrent.ConcurrentSkipListSet} in all but one regard: - * An add will overwrite if already an entry for the added key. In other words, - * where CSLS does "Adds the specified element to this set if it is not already - * present.", this implementation "Adds the specified element to this set EVEN - * if it is already present overwriting what was there previous". The call to - * add returns true if no value in the backing map or false if there was an - * entry with same key (though value may be different). - *

Otherwise, - * has same attributes as ConcurrentSkipListSet: e.g. tolerant of concurrent - * get and set and won't throw ConcurrentModificationException when iterating. - */ -@InterfaceAudience.Private -public class CellSkipListSet implements NavigableSet { - private final ConcurrentNavigableMap delegatee; - - CellSkipListSet(final CellComparator c) { - this.delegatee = new ConcurrentSkipListMap(c); - } - - CellSkipListSet(final ConcurrentNavigableMap m) { - this.delegatee = m; - } - - public Cell ceiling(Cell e) { - throw new UnsupportedOperationException("Not implemented"); - } - - public Iterator descendingIterator() { - return this.delegatee.descendingMap().values().iterator(); - } - - public NavigableSet descendingSet() { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell floor(Cell e) { - throw new UnsupportedOperationException("Not implemented"); - } - - public SortedSet headSet(final Cell toElement) { - return headSet(toElement, false); - } - - public NavigableSet headSet(final Cell toElement, - boolean inclusive) { - return new CellSkipListSet(this.delegatee.headMap(toElement, inclusive)); - } - - public Cell higher(Cell e) { - throw new UnsupportedOperationException("Not implemented"); - } - - public Iterator iterator() { - return this.delegatee.values().iterator(); - } - - public Cell lower(Cell e) { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell pollFirst() { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell pollLast() { - throw new UnsupportedOperationException("Not implemented"); - } - - public SortedSet subSet(Cell fromElement, Cell toElement) { - throw new UnsupportedOperationException("Not implemented"); - } - - public NavigableSet subSet(Cell fromElement, - boolean fromInclusive, Cell toElement, boolean toInclusive) { - throw new UnsupportedOperationException("Not implemented"); - } - - public SortedSet tailSet(Cell fromElement) { - return tailSet(fromElement, true); - } - - public NavigableSet tailSet(Cell fromElement, boolean inclusive) { - return new CellSkipListSet(this.delegatee.tailMap(fromElement, inclusive)); - } - - public Comparator comparator() { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell first() { - return this.delegatee.get(this.delegatee.firstKey()); - } - - public Cell last() { - return this.delegatee.get(this.delegatee.lastKey()); - } - - public boolean add(Cell e) { - return this.delegatee.put(e, e) == null; - } - - public boolean addAll(Collection c) { - throw new UnsupportedOperationException("Not implemented"); - } - - public void clear() { - this.delegatee.clear(); - } - - public boolean contains(Object o) { - //noinspection SuspiciousMethodCalls - return this.delegatee.containsKey(o); - } - - public boolean containsAll(Collection c) { - throw new UnsupportedOperationException("Not implemented"); - } - - public boolean isEmpty() { - return this.delegatee.isEmpty(); - } - - public boolean remove(Object o) { - return this.delegatee.remove(o) != null; - } - - public boolean removeAll(Collection c) { - throw new UnsupportedOperationException("Not implemented"); - } - - public boolean retainAll(Collection c) { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell get(Cell kv) { - return this.delegatee.get(kv); - } - - public int size() { - return this.delegatee.size(); - } - - public Object[] toArray() { - throw new UnsupportedOperationException("Not implemented"); - } - - public T[] toArray(T[] a) { - throw new UnsupportedOperationException("Not implemented"); - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactedMemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactedMemStore.java new file mode 100644 index 0000000..c8672eb --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactedMemStore.java @@ -0,0 +1,334 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.wal.WAL; + +import java.io.IOException; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.NavigableMap; +import java.util.TreeMap; + +/** + * A memstore implementation which supports in-memory compaction. + * A compaction pipeline is added between the active set and the snapshot data structures; + * it consists of a list of kv-sets that are subject to compaction. + * The semantics of the prepare-for-flush phase are changed: instead of shifting the current active + * set to snapshot, the active set is pushed into the pipeline. + * Like the snapshot, all pipeline components are read-only; updates only affect the active set. + * To ensure this property we take advantage of the existing blocking mechanism -- the active set + * is pushed to the pipeline while holding updatesLock in exclusive mode. + * Periodically, a compaction is applied in the background to all pipeline components resulting + * in a single read-only component. The “old” components are discarded when no scanner is reading + * them. + */ +@InterfaceAudience.Private +public class CompactedMemStore extends AbstractMemStore { + + public final static long DEEP_OVERHEAD_PER_PIPELINE_ITEM = ClassSize.align( + ClassSize.TIMERANGE_TRACKER + + ClassSize.CELL_SKIPLIST_SET + ClassSize.CONCURRENT_SKIPLISTMAP); + private static final Log LOG = LogFactory.getLog(CompactedMemStore.class); + private HStore store; + private CompactionPipeline pipeline; + private MemStoreCompactor compactor; + private boolean forceFlushToDisk; + private NavigableMap timestampToWALSeqId; + + public CompactedMemStore(Configuration conf, CellComparator c, + HStore store) throws IOException { + super(conf, c); + this.store = store; + this.pipeline = new CompactionPipeline(store.getHRegion()); + this.compactor = new MemStoreCompactor(this, pipeline, c, conf); + this.forceFlushToDisk = false; + this.timestampToWALSeqId = new TreeMap<>(); + } + + public static long getStoreSegmentSize(StoreSegment segment) { + return segment.getSize() - DEEP_OVERHEAD_PER_PIPELINE_ITEM; + } + + public static long getStoreSegmentListSize(LinkedList list) { + long res = 0; + for (StoreSegment segment : list) { + res += getStoreSegmentSize(segment); + } + return res; + } + + @Override + protected List getListOfScanners(long readPt) throws IOException { + LinkedList pipelineList = pipeline.getStoreSegmentList(); + LinkedList list = new LinkedList(); + list.add(getActive().getScanner(readPt)); + for (StoreSegment item : pipelineList) { + list.add(item.getScanner(readPt)); + } + list.add(getSnapshot().getScanner(readPt)); + // set sequence ids by decsending order + Iterator iterator = list.descendingIterator(); + int seqId = 0; + while (iterator.hasNext()) { + iterator.next().setSequenceID(seqId); + seqId++; + } + return list; + } + + /** + * @return Total memory occupied by this MemStore. + * This is not thread safe and the memstore may be changed while computing its size. + * It is the responsibility of the caller to make sure this doesn't happen. + */ + @Override public long size() { + long res = 0; + for (StoreSegment item : getListOfSegments()) { + res += item.getSize(); + } + return res; + } + + /** + * Push the current active memstore bucket into the pipeline + * and create a snapshot of the tail of current compaction pipeline + * Snapshot must be cleared by call to {@link #clearSnapshot}. + * {@link #clearSnapshot(long)}. + * @param flushOpSeqId the sequence id that is attached to the flush operation in the wal + * + * @return {@link MemStoreSnapshot} + */ + @Override public MemStoreSnapshot snapshot(long flushOpSeqId) { + MutableSegment active = getActive(); + // If snapshot currently has entries, then flusher failed or didn't call + // cleanup. Log a warning. + if (!getSnapshot().isEmpty()) { + LOG.warn("Snapshot called again without clearing previous. " + + "Doing nothing. Another ongoing flush or did we fail last attempt?"); + } else { + LOG.info("FORCE FLUSH MODE: Pushing active set into compaction pipeline, " + + "and pipeline tail into snapshot."); + pushActiveToPipeline(active, flushOpSeqId, false); + this.snapshotId = EnvironmentEdgeManager.currentTime(); + pushTailToSnapshot(); + resetForceFlush(); + } + return new MemStoreSnapshot(this.snapshotId, getSnapshot()); + } + + @Override + public void flushInMemory(long flushOpSeqId) { + MutableSegment active = getActive(); + LOG.info("Pushing active set into compaction pipeline, and initiating compaction."); + pushActiveToPipeline(active, flushOpSeqId, true); + try { + // Speculative compaction execution, may be interrupted if flush is forced while + // compaction is in progress + compactor.startCompact(store); + } catch (IOException e) { + LOG.error("Unable to run memstore compaction", e); + } + + } + + @Override + public void updateLowestUnflushedSequenceIdInWal(boolean onlyIfGreater) { + long minTimestamp = pipeline.getMinTimestamp(); + Long seqId = getMaxSeqId(minTimestamp); + if (seqId == null) return; + byte[] encodedRegionName = getRegion().getRegionInfo().getEncodedNameAsBytes(); + byte[] familyName = getFamilyName(); + WAL wal = getRegion().getWAL(); + if (wal != null) { + wal.updateStore(encodedRegionName, familyName, seqId, onlyIfGreater); + } + } + + private void pushActiveToPipeline(MutableSegment active, long flushOpSeqId, + boolean needToUpdateRegionMemStoreSizeCounter) { + if (!active.isEmpty()) { + pipeline.pushHead(active); + active.setSize(active.getSize() - deepOverhead() + DEEP_OVERHEAD_PER_PIPELINE_ITEM); + long size = getStoreSegmentSize(active); + resetCellSet(); + updateRegionAdditionalMemstoreSizeCounter(size); //push size into pipeline + if (needToUpdateRegionMemStoreSizeCounter) { + updateRegionMemStoreSizeCounter(-size); + } + Long now = EnvironmentEdgeManager.currentTime(); + timestampToWALSeqId.put(now, flushOpSeqId); + } + } + + private void pushTailToSnapshot() { + ImmutableSegment tail = pipeline.pullTail(); + if (!tail.isEmpty()) { + setSnapshot(tail); + long size = getStoreSegmentSize(tail); + setSnapshotSize(size); + updateRegionAdditionalMemstoreSizeCounter(-size); //pull size out of pipeline + } + } + + private void updateRegionAdditionalMemstoreSizeCounter(long size) { + if (getRegion() != null) { + long globalMemstoreAdditionalSize = getRegion().addAndGetGlobalMemstoreAdditionalSize(size); + // no need to update global memstore size as it is updated by the flusher + LOG.debug(getRegion().getRegionInfo().getEncodedName() + " globalMemstoreAdditionalSize: " + + globalMemstoreAdditionalSize); + } + } + + private void updateRegionMemStoreSizeCounter(long size) { + if (getRegion() != null) { + // need to update global memstore size when it is not accounted by the flusher + long globalMemstoreSize = getRegion().addAndGetGlobalMemstoreSize(size); + LOG.debug(getRegion().getRegionInfo().getEncodedName() + " globalMemstoreSize: " + + globalMemstoreSize); + } + } + + /** + * Remove n key from the memstore. Only kvs that have the same key and the same memstoreTS are + * removed. It is ok to not update timeRangeTracker in this call. + * + * @param cell + */ + @Override public void rollback(Cell cell) { + rollbackSnapshot(cell); + pipeline.rollback(cell); + rollbackActive(cell); + } + + @Override + public AbstractMemStore setForceFlushToDisk() { + forceFlushToDisk = true; + // stop compactor if currently working, to avoid possible conflict in pipeline + compactor.stopCompact(); + return this; + } + + @Override boolean isForceFlushToDisk() { + return forceFlushToDisk; + } + + @Override public boolean isCompactedMemStore() { + return true; + } + + @Override public boolean isMemStoreInCompaction() { + return compactor.isInCompaction(); + } + + @Override + public LinkedList getListOfSegments() { + LinkedList pipelineList = pipeline.getStoreSegmentList(); + LinkedList list = new LinkedList(); + list.add(getActive()); + list.addAll(pipelineList); + list.add(getSnapshot()); + return list; + } + + private CompactedMemStore resetForceFlush() { + forceFlushToDisk = false; + return this; + } + + //methods for tests + + /** + * @param cell Find the row that comes after this one. If null, we return the + * first. + * @return Next row or null if none found. + */ + Cell getNextRow(final Cell cell) { + Cell lowest = null; + LinkedList segments = getListOfSegments(); + for (StoreSegment segment : segments) { + if (lowest == null) { + lowest = getNextRow(cell, segment.getCellSet()); + } else { + lowest = getLowest(lowest, getNextRow(cell, segment.getCellSet())); + } + } + return lowest; + } + + void disableCompaction() { + compactor.toggleCompaction(false); + } + + void enableCompaction() { + compactor.toggleCompaction(true); + } + + public HRegion getRegion() { + return store.getHRegion(); + } + + public byte[] getFamilyName() { + return store.getFamily().getName(); + } + + /** + * Returns the (maximal) sequence id that is associated with the maximal ts that is smaller than + * the given ts, and removes all entries in the ts=>seqid map with timestamp smaller than + * the given ts. + * + * @param minTimestamp + * @return sequence id + */ + public Long getMaxSeqId(long minTimestamp) { + Long res = null; + Long last = null; + List tsToRemove = new LinkedList(); + // go through the timestamps by their order; stop when reaching the end or to a greater + // timestamp than the given one. Return the seq id that is associated with *last* ts (if not + // null) that is smaller than the given ts + for (Long ts : timestampToWALSeqId.keySet()) { + if (ts >= minTimestamp) { + break; + } + // else ts < min ts in memstore, therefore can use sequence id to truncate wal + if (last != null) { + tsToRemove.add(last); + } + last = ts; + } + if (last != null) { + tsToRemove.add(last); + res = timestampToWALSeqId.get(last); + } + for (Long ts : tsToRemove) { + timestampToWALSeqId.remove(ts); + } + return res; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionPipeline.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionPipeline.java new file mode 100644 index 0000000..aa1ca2b --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionPipeline.java @@ -0,0 +1,226 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +/** + * The compaction pipeline of a {@link CompactedMemStore}, is a FIFO queue of cell set buckets. + * It supports pushing a cell set bucket at the head of the pipeline and pulling a bucket from the + * tail to flush to disk. + * It also supports swap operation to allow the compactor swap a subset of the buckets with a new + * (compacted) one. This swap succeeds only if the version number passed with the list of buckets + * to swap is the same as the current version of the pipeline. + * The pipeline version is updated whenever swapping buckets or pulling the bucket at the tail. + */ +@InterfaceAudience.Private +public class CompactionPipeline { + private static final Log LOG = LogFactory.getLog(CompactedMemStore.class); + + private final HRegion region; + private LinkedList pipeline; + private long version; + // a lock to protect critical sections changing the structure of the list + private final Lock lock; + + private static final ImmutableSegment EMPTY_MEM_STORE_SEGMENT = StoreSegmentFactory.instance() + .createImmutableSegment(null, + CompactedMemStore.DEEP_OVERHEAD_PER_PIPELINE_ITEM); + + public CompactionPipeline(HRegion region) { + this.region = region; + this.pipeline = new LinkedList(); + this.version = 0; + this.lock = new ReentrantLock(true); + } + + public boolean pushHead(MutableSegment segment) { + lock.lock(); + try { + ImmutableSegment immutableSegment = StoreSegmentFactory.instance(). + createImmutableSegment(region.getBaseConf(), segment); + return addFirst(immutableSegment); + } finally { + lock.unlock(); + } + } + + public ImmutableSegment pullTail() { + lock.lock(); + try { + if(pipeline.isEmpty()) { + return EMPTY_MEM_STORE_SEGMENT; + } + return removeLast(); + } finally { + lock.unlock(); + } + } + + public VersionedSegmentsList getVersionedList() { + lock.lock(); + try { + LinkedList segmentList = new LinkedList(pipeline); + VersionedSegmentsList res = new VersionedSegmentsList(segmentList, version); + return res; + } finally { + lock.unlock(); + } + } + + /** + * Swaps the versioned list at the tail of the pipeline with the new compacted segment. + * Swapping only if there were no changes to the suffix of the list while it was compacted. + * @param versionedList tail of the pipeline that was compacted + * @param segment new compacted segment + * @return true iff swapped tail with new compacted segment + */ + public boolean swap(VersionedSegmentsList versionedList, ImmutableSegment segment) { + if(versionedList.getVersion() != version) { + return false; + } + lock.lock(); + try { + if(versionedList.getVersion() != version) { + return false; + } + LinkedList suffix = versionedList.getStoreSegments(); + boolean valid = validateSuffixList(suffix); + if(!valid) return false; + LOG.info("Swapping pipeline suffix with compacted item."); + swapSuffix(suffix,segment); + if(region != null) { + // update the global memstore size counter + long suffixSize = CompactedMemStore.getStoreSegmentListSize(suffix); + long newSize = CompactedMemStore.getStoreSegmentSize(segment); + long delta = suffixSize - newSize; + long globalMemstoreAdditionalSize = region.addAndGetGlobalMemstoreAdditionalSize(-delta); + LOG.info("Suffix size: "+ suffixSize+" compacted item size: "+newSize+ + " globalMemstoreAdditionalSize: "+globalMemstoreAdditionalSize); + } + return true; + } finally { + lock.unlock(); + } + } + + public long rollback(Cell cell) { + lock.lock(); + long sz = 0; + try { + if(!pipeline.isEmpty()) { + Iterator pipelineBackwardIterator = pipeline.descendingIterator(); + StoreSegment current = pipelineBackwardIterator.next(); + for (; pipelineBackwardIterator.hasNext(); current = pipelineBackwardIterator.next()) { + sz += current.rollback(cell); + } + if(sz != 0) { + incVersion(); + } + } + return sz; + } finally { + lock.unlock(); + } + } + + public boolean isEmpty() { + return pipeline.isEmpty(); + } + + public LinkedList getStoreSegmentList() { + lock.lock(); + try { + LinkedList res = new LinkedList(pipeline); + return res; + } finally { + lock.unlock(); + } + + } + + public long size() { + return pipeline.size(); + } + + private boolean validateSuffixList(LinkedList suffix) { + if(suffix.isEmpty()) { + // empty suffix is always valid + return true; + } + + Iterator pipelineBackwardIterator = pipeline.descendingIterator(); + Iterator suffixBackwardIterator = suffix.descendingIterator(); + ImmutableSegment suffixCurrent; + ImmutableSegment pipelineCurrent; + for( ; suffixBackwardIterator.hasNext(); ) { + if(!pipelineBackwardIterator.hasNext()) { + // a suffix longer than pipeline is invalid + return false; + } + suffixCurrent = suffixBackwardIterator.next(); + pipelineCurrent = pipelineBackwardIterator.next(); + if(suffixCurrent != pipelineCurrent) { + // non-matching suffix + return false; + } + } + // suffix matches pipeline suffix + return true; + } + + private void swapSuffix(LinkedList suffix, ImmutableSegment segment) { + version++; + for(StoreSegment itemInSuffix : suffix) { + itemInSuffix.close(); + } + pipeline.removeAll(suffix); + pipeline.addLast(segment); + } + + private ImmutableSegment removeLast() { + version++; + return pipeline.removeLast(); + } + + private boolean addFirst(ImmutableSegment segment) { + pipeline.add(0,segment); + return true; + } + + private void incVersion() { + version++; + } + + public long getMinTimestamp() { + long minTimestamp = Long.MIN_VALUE; + if(!isEmpty()) { + minTimestamp = pipeline.getLast().getMinTimestamp(); + } + return minTimestamp; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java index cc8c3a8..db6ef20 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java @@ -19,35 +19,23 @@ package org.apache.hadoop.hbase.regionserver; -import java.lang.management.ManagementFactory; -import java.lang.management.RuntimeMXBean; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableSet; -import java.util.SortedSet; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.util.ByteRange; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.ClassSize; -import org.apache.hadoop.hbase.util.CollectionBackedScanner; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; -import org.apache.hadoop.hbase.util.ReflectionUtils; -import org.apache.htrace.Trace; + +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.util.ArrayList; +import java.util.List; /** * The MemStore holds in-memory modifications to the Store. Modifications @@ -66,40 +54,8 @@ import org.apache.htrace.Trace; * in KV size. */ @InterfaceAudience.Private -public class DefaultMemStore implements MemStore { +public class DefaultMemStore extends AbstractMemStore { private static final Log LOG = LogFactory.getLog(DefaultMemStore.class); - static final String USEMSLAB_KEY = "hbase.hregion.memstore.mslab.enabled"; - private static final boolean USEMSLAB_DEFAULT = true; - static final String MSLAB_CLASS_NAME = "hbase.regionserver.mslab.class"; - - private Configuration conf; - - // MemStore. Use a CellSkipListSet rather than SkipListSet because of the - // better semantics. The Map will overwrite if passed a key it already had - // whereas the Set will not add new Cell if key is same though value might be - // different. Value is not important -- just make sure always same - // reference passed. - volatile CellSkipListSet cellSet; - - // Snapshot of memstore. Made for flusher. - volatile CellSkipListSet snapshot; - - final CellComparator comparator; - - // Used to track own heapSize - final AtomicLong size; - private volatile long snapshotSize; - - // Used to track when to flush - volatile long timeOfOldestEdit = Long.MAX_VALUE; - - TimeRangeTracker timeRangeTracker; - TimeRangeTracker snapshotTimeRangeTracker; - - volatile MemStoreLAB allocator; - volatile MemStoreLAB snapshotAllocator; - volatile long snapshotId; - volatile boolean tagsPresent; /** * Default constructor. Used for tests. @@ -112,184 +68,53 @@ public class DefaultMemStore implements MemStore { * Constructor. * @param c Comparator */ - public DefaultMemStore(final Configuration conf, - final CellComparator c) { - this.conf = conf; - this.comparator = c; - this.cellSet = new CellSkipListSet(c); - this.snapshot = new CellSkipListSet(c); - timeRangeTracker = new TimeRangeTracker(); - snapshotTimeRangeTracker = new TimeRangeTracker(); - this.size = new AtomicLong(DEEP_OVERHEAD); - this.snapshotSize = 0; - if (conf.getBoolean(USEMSLAB_KEY, USEMSLAB_DEFAULT)) { - String className = conf.get(MSLAB_CLASS_NAME, HeapMemStoreLAB.class.getName()); - this.allocator = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class }, new Object[] { conf }); - } else { - this.allocator = null; - } + public DefaultMemStore(final Configuration conf, final CellComparator c) { + super(conf, c); } void dump() { - for (Cell cell: this.cellSet) { - LOG.info(cell); - } - for (Cell cell: this.snapshot) { - LOG.info(cell); - } + super.dump(LOG); } /** * Creates a snapshot of the current memstore. * Snapshot must be cleared by call to {@link #clearSnapshot(long)} + * @param flushOpSeqId the sequence id that is attached to the flush operation in the wal */ @Override - public MemStoreSnapshot snapshot() { + public MemStoreSnapshot snapshot(long flushOpSeqId) { // If snapshot currently has entries, then flusher failed or didn't call // cleanup. Log a warning. - if (!this.snapshot.isEmpty()) { + if (!getSnapshot().isEmpty()) { LOG.warn("Snapshot called again without clearing previous. " + "Doing nothing. Another ongoing flush or did we fail last attempt?"); } else { this.snapshotId = EnvironmentEdgeManager.currentTime(); - this.snapshotSize = keySize(); - if (!this.cellSet.isEmpty()) { - this.snapshot = this.cellSet; - this.cellSet = new CellSkipListSet(this.comparator); - this.snapshotTimeRangeTracker = this.timeRangeTracker; - this.timeRangeTracker = new TimeRangeTracker(); - // Reset heap to not include any keys - this.size.set(DEEP_OVERHEAD); - this.snapshotAllocator = this.allocator; - // Reset allocator so we get a fresh buffer for the new memstore - if (allocator != null) { - String className = conf.get(MSLAB_CLASS_NAME, HeapMemStoreLAB.class.getName()); - this.allocator = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class }, new Object[] { conf }); - } - timeOfOldestEdit = Long.MAX_VALUE; + if (!getActive().isEmpty()) { + ImmutableSegment immutableSegment = StoreSegmentFactory.instance().createImmutableSegment + (getConfiguration(), getActive()); + setSnapshot(immutableSegment); + setSnapshotSize(keySize()); + resetCellSet(); } } - MemStoreSnapshot memStoreSnapshot = new MemStoreSnapshot(this.snapshotId, snapshot.size(), this.snapshotSize, - this.snapshotTimeRangeTracker, new CollectionBackedScanner(snapshot, this.comparator), - this.tagsPresent); - this.tagsPresent = false; - return memStoreSnapshot; - } - - /** - * The passed snapshot was successfully persisted; it can be let go. - * @param id Id of the snapshot to clean out. - * @throws UnexpectedStateException - * @see #snapshot() - */ - @Override - public void clearSnapshot(long id) throws UnexpectedStateException { - MemStoreLAB tmpAllocator = null; - if (this.snapshotId != id) { - throw new UnexpectedStateException("Current snapshot id is " + this.snapshotId + ",passed " - + id); - } - // OK. Passed in snapshot is same as current snapshot. If not-empty, - // create a new snapshot and let the old one go. - if (!this.snapshot.isEmpty()) { - this.snapshot = new CellSkipListSet(this.comparator); - this.snapshotTimeRangeTracker = new TimeRangeTracker(); - } - this.snapshotSize = 0; - this.snapshotId = -1; - if (this.snapshotAllocator != null) { - tmpAllocator = this.snapshotAllocator; - this.snapshotAllocator = null; - } - if (tmpAllocator != null) { - tmpAllocator.close(); - } - } - - @Override - public long getFlushableSize() { - return this.snapshotSize > 0 ? this.snapshotSize : keySize(); - } - - @Override - public long getSnapshotSize() { - return this.snapshotSize; - } + return new MemStoreSnapshot(this.snapshotId, getSnapshot()); - /** - * Write an update - * @param cell - * @return approximate size of the passed KV & newly added KV which maybe different than the - * passed-in KV - */ - @Override - public Pair add(Cell cell) { - Cell toAdd = maybeCloneWithAllocator(cell); - return new Pair(internalAdd(toAdd), toAdd); } @Override - public long timeOfOldestEdit() { - return timeOfOldestEdit; + protected List getListOfScanners(long readPt) throws IOException { + List list = new ArrayList(2); + list.add(0, getActive().getScanner(readPt)); + list.add(1, getSnapshot().getScanner(readPt)); + return list; } - private boolean addToCellSet(Cell e) { - boolean b = this.cellSet.add(e); - // In no tags case this NoTagsKeyValue.getTagsLength() is a cheap call. - // When we use ACL CP or Visibility CP which deals with Tags during - // mutation, the TagRewriteCell.getTagsLength() is a cheaper call. We do not - // parse the byte[] to identify the tags length. - if(e.getTagsLength() > 0) { - tagsPresent = true; - } - setOldestEditTimeToNow(); - return b; - } - - private boolean removeFromCellSet(Cell e) { - boolean b = this.cellSet.remove(e); - setOldestEditTimeToNow(); - return b; - } - - void setOldestEditTimeToNow() { - if (timeOfOldestEdit == Long.MAX_VALUE) { - timeOfOldestEdit = EnvironmentEdgeManager.currentTime(); - } - } - - /** - * Internal version of add() that doesn't clone Cells with the - * allocator, and doesn't take the lock. - * - * Callers should ensure they already have the read lock taken - */ - private long internalAdd(final Cell toAdd) { - long s = heapSizeChange(toAdd, addToCellSet(toAdd)); - timeRangeTracker.includeTimestamp(toAdd); - this.size.addAndGet(s); - return s; - } - - private Cell maybeCloneWithAllocator(Cell cell) { - if (allocator == null) { - return cell; - } - - int len = KeyValueUtil.length(cell); - ByteRange alloc = allocator.allocateBytes(len); - if (alloc == null) { - // The allocation was too large, allocator decided - // not to do anything with it. - return cell; - } - assert alloc.getBytes() != null; - KeyValueUtil.appendToByteArray(cell, alloc.getBytes(), alloc.getOffset()); - KeyValue newKv = new KeyValue(alloc.getBytes(), alloc.getOffset(), len); - newKv.setSequenceId(cell.getSequenceId()); - return newKv; + @Override protected List getListOfSegments() throws IOException { + List list = new ArrayList(2); + list.add(0, getActive()); + list.add(1, getSnapshot()); + return list; } /** @@ -302,40 +127,11 @@ public class DefaultMemStore implements MemStore { */ @Override public void rollback(Cell cell) { - // If the key is in the snapshot, delete it. We should not update - // this.size, because that tracks the size of only the memstore and - // not the snapshot. The flush of this snapshot to disk has not - // yet started because Store.flush() waits for all rwcc transactions to - // commit before starting the flush to disk. - Cell found = this.snapshot.get(cell); - if (found != null && found.getSequenceId() == cell.getSequenceId()) { - this.snapshot.remove(cell); - long sz = heapSizeChange(cell, true); - this.snapshotSize -= sz; - } - // If the key is in the memstore, delete it. Update this.size. - found = this.cellSet.get(cell); - if (found != null && found.getSequenceId() == cell.getSequenceId()) { - removeFromCellSet(cell); - long s = heapSizeChange(cell, true); - this.size.addAndGet(-s); - } + rollbackSnapshot(cell); + rollbackActive(cell); } - /** - * Write a delete - * @param deleteCell - * @return approximate size of the passed key and value. - */ - @Override - public long delete(Cell deleteCell) { - long s = 0; - Cell toAdd = maybeCloneWithAllocator(deleteCell); - s += heapSizeChange(toAdd, addToCellSet(toAdd)); - timeRangeTracker.includeTimestamp(toAdd); - this.size.addAndGet(s); - return s; - } + //methods for tests /** * @param cell Find the row that comes after this one. If null, we return the @@ -343,594 +139,40 @@ public class DefaultMemStore implements MemStore { * @return Next row or null if none found. */ Cell getNextRow(final Cell cell) { - return getLowest(getNextRow(cell, this.cellSet), getNextRow(cell, this.snapshot)); - } - - /* - * @param a - * @param b - * @return Return lowest of a or b or null if both a and b are null - */ - private Cell getLowest(final Cell a, final Cell b) { - if (a == null) { - return b; - } - if (b == null) { - return a; - } - return comparator.compareRows(a, b) <= 0? a: b; - } - - /* - * @param key Find row that follows this one. If null, return first. - * @param map Set to look in for a row beyond row. - * @return Next row or null if none found. If one found, will be a new - * KeyValue -- can be destroyed by subsequent calls to this method. - */ - private Cell getNextRow(final Cell key, - final NavigableSet set) { - Cell result = null; - SortedSet tail = key == null? set: set.tailSet(key); - // Iterate until we fall into the next row; i.e. move off current row - for (Cell cell: tail) { - if (comparator.compareRows(cell, key) <= 0) - continue; - // Note: Not suppressing deletes or expired cells. Needs to be handled - // by higher up functions. - result = cell; - break; - } - return result; + return getLowest( + getNextRow(cell, getActive().getCellSet()), + getNextRow(cell, getSnapshot().getCellSet())); } - /** - * Only used by tests. TODO: Remove - * - * Given the specs of a column, update it, first by inserting a new record, - * then removing the old one. Since there is only 1 KeyValue involved, the memstoreTS - * will be set to 0, thus ensuring that they instantly appear to anyone. The underlying - * store will ensure that the insert/delete each are atomic. A scanner/reader will either - * get the new value, or the old value and all readers will eventually only see the new - * value after the old was removed. - * - * @param row - * @param family - * @param qualifier - * @param newValue - * @param now - * @return Timestamp - */ - @Override - public long updateColumnValue(byte[] row, - byte[] family, - byte[] qualifier, - long newValue, - long now) { - Cell firstCell = KeyValueUtil.createFirstOnRow(row, family, qualifier); - // Is there a Cell in 'snapshot' with the same TS? If so, upgrade the timestamp a bit. - SortedSet snSs = snapshot.tailSet(firstCell); - if (!snSs.isEmpty()) { - Cell snc = snSs.first(); - // is there a matching Cell in the snapshot? - if (CellUtil.matchingRow(snc, firstCell) && CellUtil.matchingQualifier(snc, firstCell)) { - if (snc.getTimestamp() == now) { - // poop, - now += 1; - } - } - } - - // logic here: the new ts MUST be at least 'now'. But it could be larger if necessary. - // But the timestamp should also be max(now, mostRecentTsInMemstore) - - // so we cant add the new Cell w/o knowing what's there already, but we also - // want to take this chance to delete some cells. So two loops (sad) - - SortedSet ss = cellSet.tailSet(firstCell); - for (Cell cell : ss) { - // if this isnt the row we are interested in, then bail: - if (!CellUtil.matchingColumn(cell, family, qualifier) - || !CellUtil.matchingRow(cell, firstCell)) { - break; // rows dont match, bail. - } - - // if the qualifier matches and it's a put, just RM it out of the cellSet. - if (cell.getTypeByte() == KeyValue.Type.Put.getCode() && - cell.getTimestamp() > now && CellUtil.matchingQualifier(firstCell, cell)) { - now = cell.getTimestamp(); - } - } - - // create or update (upsert) a new Cell with - // 'now' and a 0 memstoreTS == immediately visible - List cells = new ArrayList(1); - cells.add(new KeyValue(row, family, qualifier, now, Bytes.toBytes(newValue))); - return upsert(cells, 1L); - } - - /** - * Update or insert the specified KeyValues. - *

- * For each KeyValue, insert into MemStore. This will atomically upsert the - * value for that row/family/qualifier. If a KeyValue did already exist, - * it will then be removed. - *

- * Currently the memstoreTS is kept at 0 so as each insert happens, it will - * be immediately visible. May want to change this so it is atomic across - * all KeyValues. - *

- * This is called under row lock, so Get operations will still see updates - * atomically. Scans will only see each KeyValue update as atomic. - * - * @param cells - * @param readpoint readpoint below which we can safely remove duplicate KVs - * @return change in memstore size - */ - @Override - public long upsert(Iterable cells, long readpoint) { - long size = 0; - for (Cell cell : cells) { - size += upsert(cell, readpoint); - } - return size; - } - - /** - * Inserts the specified KeyValue into MemStore and deletes any existing - * versions of the same row/family/qualifier as the specified KeyValue. - *

- * First, the specified KeyValue is inserted into the Memstore. - *

- * If there are any existing KeyValues in this MemStore with the same row, - * family, and qualifier, they are removed. - *

- * Callers must hold the read lock. - * - * @param cell - * @return change in size of MemStore - */ - private long upsert(Cell cell, long readpoint) { - // Add the Cell to the MemStore - // Use the internalAdd method here since we (a) already have a lock - // and (b) cannot safely use the MSLAB here without potentially - // hitting OOME - see TestMemStore.testUpsertMSLAB for a - // test that triggers the pathological case if we don't avoid MSLAB - // here. - long addedSize = internalAdd(cell); - - // Get the Cells for the row/family/qualifier regardless of timestamp. - // For this case we want to clean up any other puts - Cell firstCell = KeyValueUtil.createFirstOnRow( - cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), - cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), - cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); - SortedSet ss = cellSet.tailSet(firstCell); - Iterator it = ss.iterator(); - // versions visible to oldest scanner - int versionsVisible = 0; - while ( it.hasNext() ) { - Cell cur = it.next(); - - if (cell == cur) { - // ignore the one just put in - continue; - } - // check that this is the row and column we are interested in, otherwise bail - if (CellUtil.matchingRow(cell, cur) && CellUtil.matchingQualifier(cell, cur)) { - // only remove Puts that concurrent scanners cannot possibly see - if (cur.getTypeByte() == KeyValue.Type.Put.getCode() && - cur.getSequenceId() <= readpoint) { - if (versionsVisible >= 1) { - // if we get here we have seen at least one version visible to the oldest scanner, - // which means we can prove that no scanner will see this version - - // false means there was a change, so give us the size. - long delta = heapSizeChange(cur, true); - addedSize -= delta; - this.size.addAndGet(-delta); - it.remove(); - setOldestEditTimeToNow(); - } else { - versionsVisible++; - } - } - } else { - // past the row or column, done - break; - } - } - return addedSize; + @Override public AbstractMemStore setForceFlushToDisk() { + // do nothing + return this; } - /** - * @return scanner on memstore and snapshot in this order. - */ - @Override - public List getScanners(long readPt) { - return Collections. singletonList(new MemStoreScanner(readPt)); + @Override boolean isForceFlushToDisk() { + return true; } - /** - * Check if this memstore may contain the required keys - * @param scan - * @return False if the key definitely does not exist in this Memstore - */ - public boolean shouldSeek(Scan scan, long oldestUnexpiredTS) { - return (timeRangeTracker.includesTimeRange(scan.getTimeRange()) || - snapshotTimeRangeTracker.includesTimeRange(scan.getTimeRange())) - && (Math.max(timeRangeTracker.getMaximumTimestamp(), - snapshotTimeRangeTracker.getMaximumTimestamp()) >= - oldestUnexpiredTS); + @Override public boolean isCompactedMemStore() { + return false; } - /* - * MemStoreScanner implements the KeyValueScanner. - * It lets the caller scan the contents of a memstore -- both current - * map and snapshot. - * This behaves as if it were a real scanner but does not maintain position. - */ - protected class MemStoreScanner extends NonLazyKeyValueScanner { - // Next row information for either cellSet or snapshot - private Cell cellSetNextRow = null; - private Cell snapshotNextRow = null; - - // last iterated Cells for cellSet and snapshot (to restore iterator state after reseek) - private Cell cellSetItRow = null; - private Cell snapshotItRow = null; - - // iterator based scanning. - private Iterator cellSetIt; - private Iterator snapshotIt; - - // The cellSet and snapshot at the time of creating this scanner - private CellSkipListSet cellSetAtCreation; - private CellSkipListSet snapshotAtCreation; - - // the pre-calculated Cell to be returned by peek() or next() - private Cell theNext; - - // The allocator and snapshot allocator at the time of creating this scanner - volatile MemStoreLAB allocatorAtCreation; - volatile MemStoreLAB snapshotAllocatorAtCreation; - - // A flag represents whether could stop skipping Cells for MVCC - // if have encountered the next row. Only used for reversed scan - private boolean stopSkippingCellsIfNextRow = false; - - private long readPoint; - - /* - Some notes... - - So memstorescanner is fixed at creation time. this includes pointers/iterators into - existing kvset/snapshot. during a snapshot creation, the kvset is null, and the - snapshot is moved. since kvset is null there is no point on reseeking on both, - we can save us the trouble. During the snapshot->hfile transition, the memstore - scanner is re-created by StoreScanner#updateReaders(). StoreScanner should - potentially do something smarter by adjusting the existing memstore scanner. - - But there is a greater problem here, that being once a scanner has progressed - during a snapshot scenario, we currently iterate past the kvset then 'finish' up. - if a scan lasts a little while, there is a chance for new entries in kvset to - become available but we will never see them. This needs to be handled at the - StoreScanner level with coordination with MemStoreScanner. - - Currently, this problem is only partly managed: during the small amount of time - when the StoreScanner has not yet created a new MemStoreScanner, we will miss - the adds to kvset in the MemStoreScanner. - */ - - MemStoreScanner(long readPoint) { - super(); - - this.readPoint = readPoint; - cellSetAtCreation = cellSet; - snapshotAtCreation = snapshot; - if (allocator != null) { - this.allocatorAtCreation = allocator; - this.allocatorAtCreation.incScannerCount(); - } - if (snapshotAllocator != null) { - this.snapshotAllocatorAtCreation = snapshotAllocator; - this.snapshotAllocatorAtCreation.incScannerCount(); - } - if (Trace.isTracing() && Trace.currentSpan() != null) { - Trace.currentSpan().addTimelineAnnotation("Creating MemStoreScanner"); - } - } - - /** - * Lock on 'this' must be held by caller. - * @param it - * @return Next Cell - */ - private Cell getNext(Iterator it) { - Cell startCell = theNext; - Cell v = null; - try { - while (it.hasNext()) { - v = it.next(); - if (v.getSequenceId() <= this.readPoint) { - return v; - } - if (stopSkippingCellsIfNextRow && startCell != null - && comparator.compareRows(v, startCell) > 0) { - return null; - } - } - - return null; - } finally { - if (v != null) { - // in all cases, remember the last Cell iterated to - if (it == snapshotIt) { - snapshotItRow = v; - } else { - cellSetItRow = v; - } - } - } - } - - /** - * Set the scanner at the seek key. - * Must be called only once: there is no thread safety between the scanner - * and the memStore. - * @param key seek value - * @return false if the key is null or if there is no data - */ - @Override - public synchronized boolean seek(Cell key) { - if (key == null) { - close(); - return false; - } - // kvset and snapshot will never be null. - // if tailSet can't find anything, SortedSet is empty (not null). - cellSetIt = cellSetAtCreation.tailSet(key).iterator(); - snapshotIt = snapshotAtCreation.tailSet(key).iterator(); - cellSetItRow = null; - snapshotItRow = null; - - return seekInSubLists(key); - } - - - /** - * (Re)initialize the iterators after a seek or a reseek. - */ - private synchronized boolean seekInSubLists(Cell key){ - cellSetNextRow = getNext(cellSetIt); - snapshotNextRow = getNext(snapshotIt); - - // Calculate the next value - theNext = getLowest(cellSetNextRow, snapshotNextRow); - - // has data - return (theNext != null); - } - - - /** - * Move forward on the sub-lists set previously by seek. - * @param key seek value (should be non-null) - * @return true if there is at least one KV to read, false otherwise - */ - @Override - public synchronized boolean reseek(Cell key) { - /* - See HBASE-4195 & HBASE-3855 & HBASE-6591 for the background on this implementation. - This code is executed concurrently with flush and puts, without locks. - Two points must be known when working on this code: - 1) It's not possible to use the 'kvTail' and 'snapshot' - variables, as they are modified during a flush. - 2) The ideal implementation for performance would use the sub skip list - implicitly pointed by the iterators 'kvsetIt' and - 'snapshotIt'. Unfortunately the Java API does not offer a method to - get it. So we remember the last keys we iterated to and restore - the reseeked set to at least that point. - */ - cellSetIt = cellSetAtCreation.tailSet(getHighest(key, cellSetItRow)).iterator(); - snapshotIt = snapshotAtCreation.tailSet(getHighest(key, snapshotItRow)).iterator(); - - return seekInSubLists(key); - } - - - @Override - public synchronized Cell peek() { - //DebugPrint.println(" MS@" + hashCode() + " peek = " + getLowest()); - return theNext; - } - - @Override - public synchronized Cell next() { - if (theNext == null) { - return null; - } - - final Cell ret = theNext; - - // Advance one of the iterators - if (theNext == cellSetNextRow) { - cellSetNextRow = getNext(cellSetIt); - } else { - snapshotNextRow = getNext(snapshotIt); - } - - // Calculate the next value - theNext = getLowest(cellSetNextRow, snapshotNextRow); - - //long readpoint = ReadWriteConsistencyControl.getThreadReadPoint(); - //DebugPrint.println(" MS@" + hashCode() + " next: " + theNext + " next_next: " + - // getLowest() + " threadpoint=" + readpoint); - return ret; - } - - /* - * Returns the lower of the two key values, or null if they are both null. - * This uses comparator.compare() to compare the KeyValue using the memstore - * comparator. - */ - private Cell getLowest(Cell first, Cell second) { - if (first == null && second == null) { - return null; - } - if (first != null && second != null) { - int compare = comparator.compare(first, second); - return (compare <= 0 ? first : second); - } - return (first != null ? first : second); - } - - /* - * Returns the higher of the two cells, or null if they are both null. - * This uses comparator.compare() to compare the Cell using the memstore - * comparator. - */ - private Cell getHighest(Cell first, Cell second) { - if (first == null && second == null) { - return null; - } - if (first != null && second != null) { - int compare = comparator.compare(first, second); - return (compare > 0 ? first : second); - } - return (first != null ? first : second); - } - - public synchronized void close() { - this.cellSetNextRow = null; - this.snapshotNextRow = null; - - this.cellSetIt = null; - this.snapshotIt = null; - - if (allocatorAtCreation != null) { - this.allocatorAtCreation.decScannerCount(); - this.allocatorAtCreation = null; - } - if (snapshotAllocatorAtCreation != null) { - this.snapshotAllocatorAtCreation.decScannerCount(); - this.snapshotAllocatorAtCreation = null; - } - - this.cellSetItRow = null; - this.snapshotItRow = null; - } - - /** - * MemStoreScanner returns max value as sequence id because it will - * always have the latest data among all files. - */ - @Override - public long getSequenceID() { - return Long.MAX_VALUE; - } - - @Override - public boolean shouldUseScanner(Scan scan, SortedSet columns, - long oldestUnexpiredTS) { - return shouldSeek(scan, oldestUnexpiredTS); - } - - /** - * Seek scanner to the given key first. If it returns false(means - * peek()==null) or scanner's peek row is bigger than row of given key, seek - * the scanner to the previous row of given key - */ - @Override - public synchronized boolean backwardSeek(Cell key) { - seek(key); - if (peek() == null || comparator.compareRows(peek(), key) > 0) { - return seekToPreviousRow(key); - } - return true; - } - - /** - * Separately get the KeyValue before the specified key from kvset and - * snapshotset, and use the row of higher one as the previous row of - * specified key, then seek to the first KeyValue of previous row - */ - @Override - public synchronized boolean seekToPreviousRow(Cell key) { - Cell firstKeyOnRow = CellUtil.createFirstOnRow(key); - SortedSet cellHead = cellSetAtCreation.headSet(firstKeyOnRow); - Cell cellSetBeforeRow = cellHead.isEmpty() ? null : cellHead.last(); - SortedSet snapshotHead = snapshotAtCreation - .headSet(firstKeyOnRow); - Cell snapshotBeforeRow = snapshotHead.isEmpty() ? null : snapshotHead - .last(); - Cell lastCellBeforeRow = getHighest(cellSetBeforeRow, snapshotBeforeRow); - if (lastCellBeforeRow == null) { - theNext = null; - return false; - } - Cell firstKeyOnPreviousRow = CellUtil.createFirstOnRow(lastCellBeforeRow); - this.stopSkippingCellsIfNextRow = true; - seek(firstKeyOnPreviousRow); - this.stopSkippingCellsIfNextRow = false; - if (peek() == null - || comparator.compareRows(peek(), firstKeyOnPreviousRow) > 0) { - return seekToPreviousRow(lastCellBeforeRow); - } - return true; - } - - @Override - public synchronized boolean seekToLastRow() { - Cell first = cellSetAtCreation.isEmpty() ? null : cellSetAtCreation - .last(); - Cell second = snapshotAtCreation.isEmpty() ? null - : snapshotAtCreation.last(); - Cell higherCell = getHighest(first, second); - if (higherCell == null) { - return false; - } - Cell firstCellOnLastRow = CellUtil.createFirstOnRow(higherCell); - if (seek(firstCellOnLastRow)) { - return true; - } else { - return seekToPreviousRow(higherCell); - } - - } + @Override public boolean isMemStoreInCompaction() { + return false; } - public final static long FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT - + (9 * ClassSize.REFERENCE) + (3 * Bytes.SIZEOF_LONG) + Bytes.SIZEOF_BOOLEAN); - - public final static long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD + - ClassSize.ATOMIC_LONG + (2 * ClassSize.TIMERANGE_TRACKER) + - (2 * ClassSize.CELL_SKIPLIST_SET) + (2 * ClassSize.CONCURRENT_SKIPLISTMAP)); - - /* - * Calculate how the MemStore size has changed. Includes overhead of the - * backing Map. - * @param cell - * @param notpresent True if the cell was NOT present in the set. - * @return Size - */ - static long heapSizeChange(final Cell cell, final boolean notpresent) { - return notpresent ? ClassSize.align(ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY - + CellUtil.estimatedHeapSizeOf(cell)) : 0; + @Override public void flushInMemory(long flushOpSeqId) { + return; } - private long keySize() { - return heapSize() - DEEP_OVERHEAD; + @Override public void updateLowestUnflushedSequenceIdInWal(boolean onlyIfGreater) { + return; } /** - * Get the entire heap usage for this MemStore not including keys in the - * snapshot. + * @return Total memory occupied by this MemStore. */ @Override - public long heapSize() { - return size.get(); - } - - @Override public long size() { return heapSize(); } @@ -975,9 +217,9 @@ public class DefaultMemStore implements MemStore { LOG.info("memstore2 estimated size=" + size); final int seconds = 30; LOG.info("Waiting " + seconds + " seconds while heap dump is taken"); - for (int i = 0; i < seconds; i++) { + //for (int i = 0; i < seconds; i++) { // Thread.sleep(1000); - } + //} LOG.info("Exiting."); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushAllStoresPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushAllStoresPolicy.java index 0058104..a9a8113 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushAllStoresPolicy.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushAllStoresPolicy.java @@ -17,10 +17,10 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.util.Collection; - import org.apache.hadoop.hbase.classification.InterfaceAudience; +import java.util.Collection; + /** * A {@link FlushPolicy} that always flushes all stores for a given region. */ @@ -29,7 +29,7 @@ public class FlushAllStoresPolicy extends FlushPolicy { @Override public Collection selectStoresToFlush() { - return region.stores.values(); + return allStoresExcludingFlushInMemory(); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushLargeStoresPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushLargeStoresPolicy.java index 328e890..736d710 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushLargeStoresPolicy.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushLargeStoresPolicy.java @@ -17,15 +17,15 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + /** * A {@link FlushPolicy} that only flushes store larger a given threshold. If no store is large * enough, then all stores will be flushed. @@ -90,16 +90,17 @@ public class FlushLargeStoresPolicy extends FlushPolicy { Collection stores = region.stores.values(); Set specificStoresToFlush = new HashSet(); for (Store store : stores) { - if (shouldFlush(store)) { + if (shouldFlush(store) && !shouldFlushInMemory(store)) { specificStoresToFlush.add(store); } } // Didn't find any CFs which were above the threshold for selection. if (specificStoresToFlush.isEmpty()) { if (LOG.isDebugEnabled()) { - LOG.debug("Since none of the CFs were above the size, flushing all."); + LOG.debug("Since none of the CFs were above the size, flushing all that are not flushed " + + "in memory."); } - return stores; + return allStoresExcludingFlushInMemory(); } else { return specificStoresToFlush; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushPolicy.java index d581fee..f9457ba 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushPolicy.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushPolicy.java @@ -17,11 +17,14 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.util.Collection; - import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.Set; + /** * A flush policy determines the stores that need to be flushed when flushing a region. */ @@ -46,4 +49,32 @@ public abstract class FlushPolicy extends Configured { */ public abstract Collection selectStoresToFlush(); + /** + * @return the stores need to be flushed in memory. + */ + public Collection selectStoresToFlushInMemory() { + Collection stores = region.stores.values(); + Set specificStoresToFlushInMemory = new HashSet(); + for (Store store : stores) { + if (shouldFlushInMemory(store)) { + specificStoresToFlushInMemory.add(store); + } + } + return specificStoresToFlushInMemory; + } + + protected boolean shouldFlushInMemory(Store store) { + return store.shouldFlushInMemory(); + } + + protected Collection allStoresExcludingFlushInMemory() { + Collection res = new LinkedList(); + Collection specificStoresToFlushInMemory = selectStoresToFlushInMemory(); + for (Store s : region.stores.values()) { + if(!specificStoresToFlushInMemory.contains(s)) { + res.add(s); + } + } + return res; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushRequester.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushRequester.java index c7e155a..0b700d3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushRequester.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushRequester.java @@ -32,8 +32,10 @@ public interface FlushRequester { * @param region the Region requesting the cache flush * @param forceFlushAllStores whether we want to flush all stores. e.g., when request from log * rolling. + * @param forceFlushForCompacted whether we want to flush also the compacted memstores to disk, + * e.g., when request from log rolling */ - void requestFlush(Region region, boolean forceFlushAllStores); + void requestFlush(Region region, boolean forceFlushAllStores, boolean forceFlushForCompacted); /** * Tell the listener the cache needs to be flushed after a delay diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java index faf6d81..11c0352 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java @@ -17,16 +17,6 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.NavigableSet; -import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -64,6 +54,16 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.IdLock; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.NavigableSet; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; + /** * The store implementation to save MOBs (medium objects), it extends the HStore. * When a descriptor of a column family has the value "IS_MOB", it means this column family @@ -511,6 +511,10 @@ public class HMobStore extends HStore { } } + @Override public void flushInMemory(long flushOpSeqId) { } + + @Override public void updateLowestUnflushedSequenceIdInWal() { } + public void updateCellsCountCompactedToMob(long count) { cellsCountCompactedToMob += count; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index a8ffa8d..fa01658 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -1,5 +1,4 @@ -/* - * +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,96 +17,24 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.EOFException; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.lang.reflect.Constructor; -import java.text.ParseException; -import java.util.AbstractList; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.ListIterator; -import java.util.Map; -import java.util.Map.Entry; -import java.util.NavigableMap; -import java.util.NavigableSet; -import java.util.RandomAccess; -import java.util.Set; -import java.util.TreeMap; -import java.util.concurrent.Callable; -import java.util.concurrent.CompletionService; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.io.Closeables; +import com.google.protobuf.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.CellScanner; -import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.CompoundConfiguration; -import org.apache.hadoop.hbase.DoNotRetryIOException; -import org.apache.hadoop.hbase.DroppedSnapshotException; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.HConstants.OperationStatusCode; -import org.apache.hadoop.hbase.HDFSBlocksDistribution; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueUtil; -import org.apache.hadoop.hbase.NamespaceDescriptor; -import org.apache.hadoop.hbase.NotServingRegionException; -import org.apache.hadoop.hbase.RegionTooBusyException; -import org.apache.hadoop.hbase.ShareableMemory; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.Tag; -import org.apache.hadoop.hbase.TagRewriteCell; -import org.apache.hadoop.hbase.TagType; -import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.client.Append; -import org.apache.hadoop.hbase.client.Delete; -import org.apache.hadoop.hbase.client.Durability; -import org.apache.hadoop.hbase.client.Get; -import org.apache.hadoop.hbase.client.Increment; -import org.apache.hadoop.hbase.client.IsolationLevel; -import org.apache.hadoop.hbase.client.Mutation; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.RegionReplicaUtil; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.RowMutations; -import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.conf.ConfigurationManager; import org.apache.hadoop.hbase.conf.PropagatingConfigurationObserver; import org.apache.hadoop.hbase.coprocessor.RegionObserver; @@ -158,20 +85,7 @@ import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.regionserver.wal.WALUtil; import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.snapshot.SnapshotManifest; -import org.apache.hadoop.hbase.util.ByteStringer; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.CancelableProgressable; -import org.apache.hadoop.hbase.util.ClassSize; -import org.apache.hadoop.hbase.util.CompressionTest; -import org.apache.hadoop.hbase.util.Counter; -import org.apache.hadoop.hbase.util.EncryptionTest; -import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; -import org.apache.hadoop.hbase.util.FSTableDescriptors; -import org.apache.hadoop.hbase.util.FSUtils; -import org.apache.hadoop.hbase.util.HashedBytes; -import org.apache.hadoop.hbase.util.Pair; -import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; -import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.util.*; import org.apache.hadoop.hbase.wal.WAL; import org.apache.hadoop.hbase.wal.WALFactory; import org.apache.hadoop.hbase.wal.WALKey; @@ -182,19 +96,21 @@ import org.apache.hadoop.util.StringUtils; import org.apache.htrace.Trace; import org.apache.htrace.TraceScope; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Optional; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.io.Closeables; -import com.google.protobuf.ByteString; -import com.google.protobuf.Descriptors; -import com.google.protobuf.Message; -import com.google.protobuf.RpcCallback; -import com.google.protobuf.RpcController; -import com.google.protobuf.Service; -import com.google.protobuf.TextFormat; +import java.io.EOFException; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.lang.reflect.Constructor; +import java.text.ParseException; +import java.util.*; +import java.util.Map.Entry; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; @InterfaceAudience.Private public class HRegion implements HeapSize, PropagatingConfigurationObserver, Region { @@ -269,7 +185,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // TODO: account for each registered handler in HeapSize computation private Map coprocessorServiceHandlers = Maps.newHashMap(); - public final AtomicLong memstoreSize = new AtomicLong(0); + private final AtomicLong memstoreActiveSize = new AtomicLong(0); // size of active set in memstore + // size of additional memstore buckets, e.g., in compaction pipeline + private final AtomicLong memstoreAdditionalSize = new AtomicLong(0); // Debug possible data loss due to WAL off final Counter numMutationsWithoutWAL = new Counter(); @@ -570,6 +488,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi private long flushCheckInterval; // flushPerChanges is to prevent too many changes in memstore private long flushPerChanges; + // force flush size is set to be the average of flush size and blocking size + private long memStoreForceFlushSize; private long blockingMemStoreSize; final long threadWakeFrequency; // Used to guard closes @@ -756,6 +676,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi this.blockingMemStoreSize = this.memstoreFlushSize * conf.getLong(HConstants.HREGION_MEMSTORE_BLOCK_MULTIPLIER, HConstants.DEFAULT_HREGION_MEMSTORE_BLOCK_MULTIPLIER); + // set force flush size to be between flush size and blocking size + this.memStoreForceFlushSize = (this.memstoreFlushSize + this.blockingMemStoreSize) / 2; } /** @@ -1075,9 +997,18 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi if (this.rsAccounting != null) { rsAccounting.addAndGetGlobalMemstoreSize(memStoreSize); } - return this.memstoreSize.addAndGet(memStoreSize); + return this.memstoreActiveSize.addAndGet(memStoreSize); + } + + public long addAndGetGlobalMemstoreAdditionalSize(long size) { + if (this.rsAccounting != null) { + rsAccounting.addAndGetGlobalMemstoreAdditionalSize(size); + } + return this.memstoreAdditionalSize.addAndGet(size); } + + @Override public HRegionInfo getRegionInfo() { return this.fs.getRegionInfo(); @@ -1113,7 +1044,16 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi @Override public long getMemstoreSize() { - return memstoreSize.get(); + return memstoreActiveSize.get(); + } + + private long getMemstoreAdditionalSize() { + return memstoreAdditionalSize.get(); + } + + @Override + public long getMemstoreTotalSize() { + return getMemstoreSize() + getMemstoreAdditionalSize(); } @Override @@ -1406,7 +1346,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // Don't flush the cache if we are aborting if (!abort && canFlush) { int flushCount = 0; - while (this.memstoreSize.get() > 0) { + while (this.getMemstoreTotalSize() > 0) { try { if (flushCount++ > 0) { int actualFlushes = flushCount - 1; @@ -1415,7 +1355,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // so we do not lose data throw new DroppedSnapshotException("Failed clearing memory after " + actualFlushes + " attempts on region: " + - Bytes.toStringBinary(getRegionInfo().getRegionName())); + Bytes.toStringBinary(getRegionInfo().getRegionName()) + + " memstore size: " + getMemstoreSize() + " total size (memstore + pipeline)" + + ": " + getMemstoreTotalSize()); } LOG.info("Running extra flush, " + actualFlushes + " (carrying snapshot?) " + this); @@ -1449,7 +1391,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi getRegionServerServices().abort("Assertion failed while closing store " + getRegionInfo().getRegionNameAsString() + " " + store + ". flushableSize expected=0, actual= " + flushableSize - + ". Current memstoreSize=" + getMemstoreSize() + ". Maybe a coprocessor " + + ". Current memstoreActiveSize=" + getMemstoreSize() + ". Maybe a coprocessor " + "operation failed and left the memstore in a partially updated state.", null); } completionService @@ -1487,10 +1429,12 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } this.closed.set(true); + if (getMemstoreTotalSize() != 0) LOG.error(getRegionInfo().getEncodedName()+" Memstore size" + + " is " + getMemstoreTotalSize()); if (!canFlush) { - addAndGetGlobalMemstoreSize(-memstoreSize.get()); - } else if (memstoreSize.get() != 0) { - LOG.error("Memstore size is " + memstoreSize.get()); + addAndGetGlobalMemstoreSize(-memstoreActiveSize.get()); + } else if (memstoreActiveSize.get() != 0) { + LOG.error(getRegionInfo().getEncodedName()+" Memstore size is " + memstoreActiveSize.get()); } if (coprocessorHost != null) { status.setStatus("Running coprocessor post-close hooks"); @@ -1575,7 +1519,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * @return True if its worth doing a flush before we put up the close flag. */ private boolean worthPreFlushing() { - return this.memstoreSize.get() > + return this.memstoreActiveSize.get() > this.conf.getLong("hbase.hregion.preclose.flush.size", 1024 * 1024 * 5); } @@ -1839,10 +1783,25 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } @Override - public FlushResult flush(boolean force) throws IOException { - return flushcache(force, false); + public FlushResult flush(boolean forceFlushAllStores, + boolean forceDiskFlushInsteadOfInMemoryFlush) throws IOException { + boolean writeFlushRequestWalMarker = false; + return flushcache(forceFlushAllStores, writeFlushRequestWalMarker, + forceDiskFlushInsteadOfInMemoryFlush); } + @Override + public FlushResult flush(boolean forceFlushAllStores) throws IOException { + boolean writeFlushRequestWalMarker = false; + return flushcache(forceFlushAllStores, writeFlushRequestWalMarker); + } + + public FlushResult flushcache(boolean forceFlushAllStores, boolean writeFlushRequestWalMarker) + throws IOException { + boolean forceDiskFlushInsteadOfInMemoryFlush = true; + return flushcache(forceFlushAllStores, writeFlushRequestWalMarker, + forceDiskFlushInsteadOfInMemoryFlush); + } /** * Flush the cache. * @@ -1865,8 +1824,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * because a Snapshot was not properly persisted. The region is put in closing mode, and the * caller MUST abort after this. */ - public FlushResult flushcache(boolean forceFlushAllStores, boolean writeFlushRequestWalMarker) - throws IOException { + public FlushResult flushcache(boolean forceFlushAllStores, boolean writeFlushRequestWalMarker, + boolean forceDiskFlushInsteadOfInMemoryFlush) throws IOException { // fail-fast instead of waiting on the lock if (this.closing.get()) { String msg = "Skipping flush on " + this + " because closing"; @@ -1912,10 +1871,17 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } try { - Collection specificStoresToFlush = + if(forceDiskFlushInsteadOfInMemoryFlush) { + for(Store s : stores.values()) { + s.setForceFlushToDisk(); + } + } + Collection specificStoresToFlushToDisk = forceFlushAllStores ? stores.values() : flushPolicy.selectStoresToFlush(); - FlushResult fs = internalFlushcache(specificStoresToFlush, - status, writeFlushRequestWalMarker); + Collection specificStoresToFlushInMemory = + forceFlushAllStores ? Collections.EMPTY_SET : flushPolicy.selectStoresToFlushInMemory(); + FlushResult fs = internalFlushcache(specificStoresToFlushToDisk, + specificStoresToFlushInMemory, status, writeFlushRequestWalMarker); if (coprocessorHost != null) { status.setStatus("Running post-flush coprocessor hooks"); @@ -2009,22 +1975,28 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi /** * Flushing all stores. * - * @see #internalFlushcache(Collection, MonitoredTask, boolean) + * @see #internalFlushcache(java.util.Collection, java.util.Collection, + * org.apache.hadoop.hbase.monitoring.MonitoredTask, boolean) */ private FlushResult internalFlushcache(MonitoredTask status) throws IOException { - return internalFlushcache(stores.values(), status, false); + for(Store s : stores.values()) { + s.setForceFlushToDisk(); + } + return internalFlushcache(stores.values(), Collections.EMPTY_SET, status, false); } /** * Flushing given stores. * - * @see #internalFlushcache(WAL, long, Collection, MonitoredTask, boolean) + * @see #internalFlushcache(org.apache.hadoop.hbase.wal.WAL, long, java.util.Collection, + * java.util.Collection, org.apache.hadoop.hbase.monitoring.MonitoredTask, boolean) */ - private FlushResult internalFlushcache(final Collection storesToFlush, - MonitoredTask status, boolean writeFlushWalMarker) throws IOException { - return internalFlushcache(this.wal, HConstants.NO_SEQNUM, storesToFlush, - status, writeFlushWalMarker); + private FlushResult internalFlushcache(final Collection storesToFlushToDisk, + Collection storesToFlushInMemory, MonitoredTask status, + boolean writeFlushWalMarker) throws IOException { + return internalFlushcache(this.wal, HConstants.NO_SEQNUM, storesToFlushToDisk, + storesToFlushInMemory, status, writeFlushWalMarker); } /** @@ -2046,8 +2018,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * @param myseqid * The seqid to use if wal is null writing out flush * file. - * @param storesToFlush + * @param storesToFlushToDisk * The list of stores to flush. + * @param storesToFlushInMemory * @return object describing the flush's state * @throws IOException * general io exceptions @@ -2056,19 +2029,23 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * properly persisted. */ protected FlushResult internalFlushcache(final WAL wal, final long myseqid, - final Collection storesToFlush, MonitoredTask status, boolean writeFlushWalMarker) + final Collection storesToFlushToDisk, Collection storesToFlushInMemory, + MonitoredTask status, boolean writeFlushWalMarker) throws IOException { PrepareFlushResult result - = internalPrepareFlushCache(wal, myseqid, storesToFlush, status, writeFlushWalMarker); + = internalPrepareFlushCache(wal, myseqid, storesToFlushToDisk, + storesToFlushInMemory, status, writeFlushWalMarker); if (result.result == null) { - return internalFlushCacheAndCommit(wal, status, result, storesToFlush); + return internalFlushCacheAndCommit(wal, status, result, storesToFlushToDisk); } else { return result.result; // early exit due to failure from prepare stage } } protected PrepareFlushResult internalPrepareFlushCache(final WAL wal, final long myseqid, - final Collection storesToFlush, MonitoredTask status, boolean writeFlushWalMarker) + final Collection storesToFlushToDisk, + Collection storesToFlushInMemory, MonitoredTask status, + boolean writeFlushWalMarker) throws IOException { if (this.rsServices != null && this.rsServices.isAborted()) { // Don't flush when server aborting, it's unsafe @@ -2076,13 +2053,16 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } final long startTime = EnvironmentEdgeManager.currentTime(); // If nothing to flush, return, but we need to safely update the region sequence id - if (this.memstoreSize.get() <= 0) { + if (getMemstoreTotalSize() <= 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("Empty memstore size for the current region " + this); + } // Take an update lock because am about to change the sequence id and we want the sequence id // to be at the border of the empty memstore. MultiVersionConcurrencyControl.WriteEntry writeEntry = null; this.updatesLock.writeLock().lock(); try { - if (this.memstoreSize.get() <= 0) { + if (this.getMemstoreTotalSize() <= 0) { // Presume that if there are still no edits in the memstore, then there are no edits for // this region out in the WAL subsystem so no need to do any trickery clearing out // edits in the WAL system. Up the sequence number so the resulting flush id is for @@ -2122,15 +2102,15 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi if (LOG.isInfoEnabled()) { // Log a fat line detailing what is being flushed. StringBuilder perCfExtras = null; - if (!isAllFamilies(storesToFlush)) { + if (!isAllFamilies(storesToFlushToDisk)) { perCfExtras = new StringBuilder(); - for (Store store: storesToFlush) { + for (Store store: storesToFlushToDisk) { perCfExtras.append("; ").append(store.getColumnFamilyName()); perCfExtras.append("=").append(StringUtils.byteDesc(store.getMemStoreSize())); } } - LOG.info("Flushing " + + storesToFlush.size() + "/" + stores.size() + - " column families, memstore=" + StringUtils.byteDesc(this.memstoreSize.get()) + + LOG.info("Flushing " + + storesToFlushToDisk.size() + "/" + stores.size() + + " column families, memstore=" + StringUtils.byteDesc(this.memstoreActiveSize.get()) + ((perCfExtras != null && perCfExtras.length() > 0)? perCfExtras.toString(): "") + ((wal != null) ? "" : "; WAL is null, using passed sequenceid=" + myseqid)); } @@ -2149,7 +2129,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi long totalFlushableSizeOfFlushableStores = 0; Set flushedFamilyNames = new HashSet(); - for (Store store: storesToFlush) { + for (Store store: storesToFlushToDisk) { flushedFamilyNames.add(store.getFamily().getName()); } @@ -2193,7 +2173,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi flushedSeqId = flushOpSeqId = myseqid; } - for (Store s : storesToFlush) { + for (Store s : storesToFlushToDisk) { totalFlushableSizeOfFlushableStores += s.getFlushableSize(); storeFlushCtxs.put(s.getFamily().getName(), s.createFlushContext(flushOpSeqId)); committedFiles.put(s.getFamily().getName(), null); // for writing stores to WAL @@ -2209,9 +2189,13 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi desc, false, mvcc); } - // Prepare flush (take a snapshot) + //flush in memory + for(Store s : storesToFlushInMemory) { + s.flushInMemory(flushOpSeqId); + } + // Prepare flush to disk (take a snapshot) for (StoreFlushContext flush : storeFlushCtxs.values()) { - flush.prepare(); + flush.prepareFlushToDisk(flushOpSeqId); } } catch (IOException ex) { if (wal != null) { @@ -2400,6 +2384,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } // If we get to here, the HStores have been written. + + // stores that do in memory flushes might still have data in memory therefor need to update the + // wal w.r.t. their content + updateLowestUnflushedSequenceIdInWal(storesToFlush); if (wal != null) { wal.completeCacheFlush(this.getRegionInfo().getEncodedNameAsBytes()); } @@ -2419,7 +2407,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } long time = EnvironmentEdgeManager.currentTime() - startTime; - long memstoresize = this.memstoreSize.get(); + long memstoresize = this.memstoreActiveSize.get(); String msg = "Finished memstore flush of ~" + StringUtils.byteDesc(totalFlushableSizeOfFlushableStores) + "/" + totalFlushableSizeOfFlushableStores + ", currentsize=" @@ -2436,6 +2424,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi flushOpSeqId); } + // stores that do in memory flushes might still have data in memory therefor need to update the + // wal w.r.t. their content + private void updateLowestUnflushedSequenceIdInWal(Collection storesToFlush) { + for(Store s :storesToFlush) { + s.updateLowestUnflushedSequenceIdInWal(); + } + } + /** * Method to safely get the next sequence number. * @return Next sequence number unassociated with any actual edit. @@ -2805,10 +2801,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi initialized = true; } long addedSize = doMiniBatchMutation(batchOp); - long newSize = this.addAndGetGlobalMemstoreSize(addedSize); - if (isFlushSize(newSize)) { - requestFlush(); - } + this.addAndGetGlobalMemstoreSize(addedSize); + requestFlushIfNeeded(); } } finally { closeRegionOperation(op); @@ -3595,20 +3589,61 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // If catalog region, do not impose resource constraints or block updates. if (this.getRegionInfo().isMetaRegion()) return; - if (this.memstoreSize.get() > this.blockingMemStoreSize) { + long memstoreSize = this.getMemstoreTotalSize(); + // block writes and force flush + if (memstoreSize > this.blockingMemStoreSize) { blockedRequestsCount.increment(); - requestFlush(); + requestAndForceFlush(false); throw new RegionTooBusyException("Above memstore limit, " + "regionName=" + (this.getRegionInfo() == null ? "unknown" : this.getRegionInfo().getRegionNameAsString()) + ", server=" + (this.getRegionServerServices() == null ? "unknown" : this.getRegionServerServices().getServerName()) + - ", memstoreSize=" + memstoreSize.get() + + ", memstoreActiveSize=" + memstoreSize + ", blockingMemStoreSize=" + blockingMemStoreSize); } } /** + * requests flush if the size of all memstores in region exceeds the flush thresholds; force + * the flush if it exceeds the force flush threshold + * @throws RegionTooBusyException + */ + private void requestFlushIfNeeded() throws RegionTooBusyException { + long memstoreSize = this.getMemstoreSize(); + long memstoreTotalSize = this.getMemstoreTotalSize(); // including compaction pipelines + + // force flush + if (memstoreTotalSize > this.memStoreForceFlushSize) { + requestAndForceFlush(true); + return; + } + + // (regular) flush + if (memstoreSize > this.memstoreFlushSize) { + requestFlush(); + } + } + + /** + * request flush. + * If the memstore is not in compaction or we do not need to wait for compactions to end then + * force the flush. + * @param waitForCompactions whether to wait for the compaction to end or to force the flush + * without waiting + */ + private void requestAndForceFlush(boolean waitForCompactions) { + for (Store s : stores.values()) { + if(waitForCompactions && s.isMemStoreInCompaction()) { + // do not force flush if memstore compaction is in progress + continue; + } + s.setForceFlushToDisk(); + } + requestFlush(); + } + + /** * @throws IOException Throws exception if region is in read-only mode. */ protected void checkReadOnly() throws IOException { @@ -3786,7 +3821,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi writestate.flushRequested = true; } // Make request outside of synchronize block; HBASE-818. - this.rsServices.getFlushRequester().requestFlush(this, false); + this.rsServices.getFlushRequester().requestFlush(this, false, false); if (LOG.isDebugEnabled()) { LOG.debug("Flush requested on " + this.getRegionInfo().getEncodedName()); } @@ -3906,7 +3941,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } if (seqid > minSeqIdForTheRegion) { // Then we added some edits to memory. Flush and cleanup split edit files. - internalFlushcache(null, seqid, stores.values(), status, false); + internalFlushcache(null, seqid, stores.values(), Collections.EMPTY_SET, status, false); } // Now delete the content of recovered edits. We're done w/ them. if (files.size() > 0 && this.conf.getBoolean("hbase.region.archive.recovered.edits", false)) { @@ -4073,7 +4108,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi editsCount++; } if (flush) { - internalFlushcache(null, currentEditSeqId, stores.values(), status, false); + internalFlushcache(null, currentEditSeqId, stores.values(), + Collections.EMPTY_SET, status, false);//force flush } if (coprocessorHost != null) { @@ -4265,7 +4301,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // invoke prepareFlushCache. Send null as wal since we do not want the flush events in wal PrepareFlushResult prepareResult = internalPrepareFlushCache(null, - flushSeqId, storesToFlush, status, false); + flushSeqId, storesToFlush, Collections.EMPTY_SET, status, false); if (prepareResult.result == null) { // save the PrepareFlushResult so that we can use it later from commit flush this.writestate.flushing = true; @@ -4537,7 +4573,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi long snapshotSize = s.getFlushableSize(); this.addAndGetGlobalMemstoreSize(-snapshotSize); StoreFlushContext ctx = s.createFlushContext(currentSeqId); - ctx.prepare(); + ctx.prepareFlushToDisk(currentSeqId); ctx.abort(); return snapshotSize; } @@ -5526,7 +5562,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } @Override - public synchronized boolean next(List outResults, ScannerContext scannerContext) throws IOException { + public synchronized boolean next(List outResults, ScannerContext scannerContext) + throws IOException { if (this.filterClosed) { throw new UnknownScannerException("Scanner was closed (timed out?) " + "after we renewed it. Could be caused by a very slow scanner " + @@ -5959,7 +5996,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi protected boolean isStopRow(Cell currentRowCell) { return currentRowCell == null - || (stopRow != null && comparator.compareRows(currentRowCell, stopRow, 0, stopRow.length) >= isScan); + || (stopRow != null && + comparator.compareRows(currentRowCell, stopRow, 0, stopRow.length) >= isScan); } @Override @@ -6692,7 +6730,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi return null; } ClientProtos.RegionLoadStats.Builder stats = ClientProtos.RegionLoadStats.newBuilder(); - stats.setMemstoreLoad((int) (Math.min(100, (this.memstoreSize.get() * 100) / this + stats.setMemstoreLoad((int) (Math.min(100, (this.getMemstoreTotalSize() * 100) / this .memstoreFlushSize))); stats.setHeapOccupancy((int)rsServices.getHeapMemoryManager().getHeapOccupancyPercent()*100); return stats.build(); @@ -6870,9 +6908,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } finally { closeRegionOperation(); - if (!mutations.isEmpty() && - isFlushSize(this.addAndGetGlobalMemstoreSize(addedSize))) { - requestFlush(); + if (!mutations.isEmpty()) { + this.addAndGetGlobalMemstoreSize(addedSize); + requestFlushIfNeeded(); } } } @@ -7109,6 +7147,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi //store the kvs to the temporary memstore before writing WAL tempMemstore.put(store, kvs); } + this.addAndGetGlobalMemstoreSize(size); // Actually write to WAL now if (walEdits != null && !walEdits.isEmpty()) { @@ -7196,10 +7235,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi this.metricsRegion.updateAppend(); } - if (flush) { - // Request a cache flush. Do it outside update lock. - requestFlush(); - } + // Request a cache flush. Do it outside update lock. + requestFlushIfNeeded(); return mutate.isReturnResults() ? Result.create(allKVs) : null; } @@ -7387,8 +7424,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } } } - size = this.addAndGetGlobalMemstoreSize(size); - flush = isFlushSize(size); + this.addAndGetGlobalMemstoreSize(size); } } finally { this.updatesLock.readLock().unlock(); @@ -7421,10 +7457,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } } - if (flush) { - // Request a cache flush. Do it outside update lock. - requestFlush(); - } + // Request a cache flush. Do it outside update lock. + requestFlushIfNeeded(); + return mutation.isReturnResults() ? Result.create(allKVs) : null; } @@ -7444,7 +7479,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final long FIXED_OVERHEAD = ClassSize.align( ClassSize.OBJECT + ClassSize.ARRAY + - 43 * ClassSize.REFERENCE + 3 * Bytes.SIZEOF_INT + + 46 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + (14 * Bytes.SIZEOF_LONG) + 5 * Bytes.SIZEOF_BOOLEAN); @@ -8071,4 +8106,28 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi return this.getRegionInfo().isMetaRegion() ? CellComparator.META_COMPARATOR : CellComparator.COMPARATOR; } + + + //// method for debugging tests + public void throwException(String title, String regionName) { + String msg = title+", "; + msg += getRegionInfo().toString(); + msg += getRegionInfo().isMetaRegion() ? " meta region " : " "; + msg += getRegionInfo().isMetaTable() ? " meta table " : " "; + msg += "stores: "; + for(Store s : getStores()) { + msg += s.getFamily().getNameAsString(); + msg += " size: "; + msg += s.getMemStoreSize(); + msg += " "; + } + msg += "end-of-stores"; + msg += ", memstore size "; + msg += getMemstoreSize(); + msg += ", total memstore size "; + msg += getMemstoreTotalSize(); + if(getRegionInfo().getRegionNameAsString().startsWith(regionName)) { + throw new RuntimeException(msg); + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 59d13fa..0c95508 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -18,40 +18,16 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.lang.Thread.UncaughtExceptionHandler; -import java.lang.management.ManagementFactory; -import java.lang.management.MemoryUsage; -import java.lang.reflect.Constructor; -import java.net.BindException; -import java.net.InetAddress; -import java.net.InetSocketAddress; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.locks.ReentrantReadWriteLock; - -import javax.management.MalformedObjectNameException; -import javax.management.ObjectName; -import javax.servlet.http.HttpServlet; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import com.google.protobuf.BlockingRpcChannel; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Message; +import com.google.protobuf.RpcCallback; +import com.google.protobuf.RpcController; +import com.google.protobuf.Service; +import com.google.protobuf.ServiceException; import org.apache.commons.lang.math.RandomUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -176,16 +152,38 @@ import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.data.Stat; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; -import com.google.protobuf.BlockingRpcChannel; -import com.google.protobuf.Descriptors; -import com.google.protobuf.Message; -import com.google.protobuf.RpcCallback; -import com.google.protobuf.RpcController; -import com.google.protobuf.Service; -import com.google.protobuf.ServiceException; +import javax.management.MalformedObjectNameException; +import javax.management.ObjectName; +import javax.servlet.http.HttpServlet; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.lang.Thread.UncaughtExceptionHandler; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryUsage; +import java.lang.reflect.Constructor; +import java.net.BindException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReentrantReadWriteLock; /** * HRegionServer makes a set of HRegions available to clients. It checks in with @@ -667,7 +665,8 @@ public class HRegionServer extends HasThread implements coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance); if (LOG.isDebugEnabled()) { - LOG.debug("Registered regionserver coprocessor service: service=" + serviceDesc.getFullName()); + LOG.debug("Registered regionserver coprocessor service: service=" + + serviceDesc.getFullName()); } return true; } @@ -1437,7 +1436,7 @@ public class HRegionServer extends HasThread implements int storefiles = 0; int storeUncompressedSizeMB = 0; int storefileSizeMB = 0; - int memstoreSizeMB = (int) (r.getMemstoreSize() / 1024 / 1024); + int memstoreSizeMB = (int) (r.getMemstoreTotalSize() / 1024 / 1024); int storefileIndexSizeMB = 0; int rootIndexSizeKB = 0; int totalStaticIndexSizeKB = 0; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index cfda1c6..bb98f75 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -18,30 +18,12 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.net.InetSocketAddress; -import java.security.Key; -import java.security.KeyException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableSet; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.CompletionService; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.Future; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.locks.ReentrantReadWriteLock; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableCollection; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -78,9 +60,9 @@ import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; +import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController; import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; import org.apache.hadoop.hbase.regionserver.compactions.OffPeakHours; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController; import org.apache.hadoop.hbase.regionserver.wal.WALUtil; import org.apache.hadoop.hbase.security.EncryptionUtil; import org.apache.hadoop.hbase.security.User; @@ -94,12 +76,29 @@ import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableCollection; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.InetSocketAddress; +import java.security.Key; +import java.security.KeyException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableSet; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; /** * A Store holds a column family in a Region. Its a memstore and a set of zero @@ -125,7 +124,7 @@ public class HStore implements Store { private static final Log LOG = LogFactory.getLog(HStore.class); - protected final MemStore memstore; + protected final AbstractMemStore memstore; // This stores directory in the filesystem. protected final HRegion region; private final HColumnDescriptor family; @@ -232,9 +231,13 @@ public class HStore implements Store { // Why not just pass a HColumnDescriptor in here altogether? Even if have // to clone it? scanInfo = new ScanInfo(family, ttl, timeToPurgeDeletes, this.comparator); - String className = conf.get(MEMSTORE_CLASS_NAME, DefaultMemStore.class.getName()); - this.memstore = ReflectionUtils.instantiateWithCustomCtor(className, new Class[] { - Configuration.class, CellComparator.class }, new Object[] { conf, this.comparator }); + if(family.isInMemory()) { + this.memstore = new CompactedMemStore(conf, this.comparator, this); + } else { + String className = conf.get(MEMSTORE_CLASS_NAME, DefaultMemStore.class.getName()); + this.memstore = ReflectionUtils.instantiateWithCustomCtor(className, new Class[] { + Configuration.class, CellComparator.class }, new Object[] { conf, this.comparator }); + } this.offPeakHours = OffPeakHours.getInstance(conf); // Setting up cache configuration for this family @@ -903,7 +906,7 @@ public class HStore implements Store { void snapshot() { this.lock.writeLock().lock(); try { - this.memstore.snapshot(); + this.memstore.snapshot(0); } finally { this.lock.writeLock().unlock(); } @@ -1992,8 +1995,6 @@ public class HStore implements Store { } /** - * Used in tests. TODO: Remove - * * Updates the value for the given row/family/qualifier. This function will always be seen as * atomic by other readers because it only puts a single KV to memstore. Thus no read/write * control necessary. @@ -2004,6 +2005,7 @@ public class HStore implements Store { * @return memstore size delta * @throws IOException */ + @VisibleForTesting public long updateColumnValue(byte [] row, byte [] f, byte [] qualifier, long newValue) throws IOException { @@ -2054,10 +2056,11 @@ public class HStore implements Store { /** * This is not thread safe. The caller should have a lock on the region or the store. * If necessary, the lock can be added with the patch provided in HBASE-10087 + * @param flushOpSeqId the sequence id that is attached to the flush operation in the wal */ @Override - public void prepare() { - this.snapshot = memstore.snapshot(); + public void prepareFlushToDisk(long flushOpSeqId) { + this.snapshot = memstore.snapshot(flushOpSeqId); this.cacheFlushCount = snapshot.getCellsCount(); this.cacheFlushSize = snapshot.getSize(); committedFiles = new ArrayList(1); @@ -2287,4 +2290,31 @@ public class HStore implements Store { public boolean isPrimaryReplicaStore() { return getRegionInfo().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID; } + + @Override + public void setForceFlushToDisk() { + this.memstore.setForceFlushToDisk(); + } + + @Override + public boolean isMemStoreInCompaction() { + return memstore.isMemStoreInCompaction(); + } + + @Override public boolean shouldFlushInMemory() { + return memstore.shouldFlushInMemory(); + } + + @Override public void flushInMemory(long flushOpSeqId) { + memstore.flushInMemory(flushOpSeqId); + } + + @Override public void updateLowestUnflushedSequenceIdInWal() { + memstore.updateLowestUnflushedSequenceIdInWal(false); //update even if not greater + } + + //method for tests + @Override public boolean isCompactedMemStore() { + return memstore.isCompactedMemStore(); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ImmutableSegment.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ImmutableSegment.java new file mode 100644 index 0000000..c8231bf --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ImmutableSegment.java @@ -0,0 +1,64 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.hadoop.hbase.Cell; + +/** + */ +public abstract class ImmutableSegment extends StoreSegment { + + public ImmutableSegment(StoreSegment segment) { + super(segment); + } + + /** + * Removes the given cell from this segment. + * By default immutable store segment can not rollback + * It may be invoked by tests in specific cases where it is known to be supported {@See + * ImmutableSegmentAdapter} + */ + @Override + public long rollback(Cell cell) { + return 0; + } + + /** + * Returns a set of all the cells in the segment. + * The implementation of this method might be very inefficient for some immutable segments + * that do not maintain a cell set. Therefore by default this method is not supported. + * It may be invoked by tests in specific cases where it is known to be supported {@See + * ImmutableSegmentAdapter} + */ + @Override + public CellSet getCellSet() { + throw new NotImplementedException("Immutable Segment does not support this operation by " + + "default"); + } + + /** + * Builds a special scanner for the MemStoreSnapshot object that may be different than the + * general segment scanner. + * @return a special scanner for the MemStoreSnapshot object + */ + public abstract KeyValueScanner getScannerForMemStoreSnapshot(); + + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ImmutableSegmentAdapter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ImmutableSegmentAdapter.java new file mode 100644 index 0000000..45203a8 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ImmutableSegmentAdapter.java @@ -0,0 +1,93 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.util.CollectionBackedScanner; + +/** + * An immutable memstore segment which wraps and adapts a mutable segment. + * This is used when a mutable segment is moved to being a snapshot or pushed into a compaction + * pipeline, that consists only of immutable segments. + * The compaction may generate different type of mutable segment + */ +public class ImmutableSegmentAdapter extends ImmutableSegment { + + final private MutableSegment adaptee; + + public ImmutableSegmentAdapter(MutableSegment segment) { + super(segment); + this.adaptee = segment; + } + + @Override + public KeyValueScanner getScannerForMemStoreSnapshot() { + return new CollectionBackedScanner(adaptee.getCellSet(), adaptee.getComparator()); + } + + @Override public StoreSegmentScanner getScanner(long readPoint) { + return adaptee.getScanner(readPoint); + } + + @Override public boolean isEmpty() { + return adaptee.isEmpty(); + } + + @Override public int getCellsCount() { + return adaptee.getCellsCount(); + } + + @Override public long add(Cell cell) { + return adaptee.add(cell); + } + + @Override public Cell getFirstAfter(Cell cell) { + return adaptee.getFirstAfter(cell); + } + + @Override public void close() { + adaptee.close(); + } + + @Override public Cell maybeCloneWithAllocator(Cell cell) { + return adaptee.maybeCloneWithAllocator(cell); + } + + @Override public StoreSegment setSize(long size) { + adaptee.setSize(size); + return this; + } + + @Override public long getSize() { + return adaptee.getSize(); + } + + @Override public long rollback(Cell cell) { + return adaptee.rollback(cell); + } + + @Override public CellSet getCellSet() { + return adaptee.getCellSet(); + } + + @Override public void dump(Log log) { + adaptee.dump(log); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java index 40edc05..4cf5212 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java @@ -181,7 +181,10 @@ public class LogRoller extends HasThread { requester = this.services.getFlushRequester(); if (requester != null) { // force flushing all stores to clean old logs - requester.requestFlush(r, true); + // The regions to flush are those whose number of un-archived WAL files + // is greater than maximum allowed. In this case, cause even the compacted + // MemStore flush to disk (last parameter true). + requester.requestFlush(r, true, true); scheduled = true; } } @@ -205,4 +208,4 @@ public class LogRoller extends HasThread { } return true; } -} \ No newline at end of file +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java index d24299d..7d6ef73 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java @@ -17,13 +17,14 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.util.List; - -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.util.Pair; +import java.io.IOException; +import java.util.List; + /** * The MemStore holds in-memory modifications to the Store. Modifications are {@link Cell}s. *

@@ -38,14 +39,15 @@ public interface MemStore extends HeapSize { * Creates a snapshot of the current memstore. Snapshot must be cleared by call to * {@link #clearSnapshot(long)}. * @return {@link MemStoreSnapshot} + * @param flushOpSeqId */ - MemStoreSnapshot snapshot(); + MemStoreSnapshot snapshot(long flushOpSeqId); /** * Clears the current snapshot of the Memstore. * @param id * @throws UnexpectedStateException - * @see #snapshot() + * @see #snapshot(long) */ void clearSnapshot(long id) throws UnexpectedStateException; @@ -130,7 +132,7 @@ public interface MemStore extends HeapSize { * @return scanner over the memstore. This might include scanner over the snapshot when one is * present. */ - List getScanners(long readPt); + List getScanners(long readPt) throws IOException; /** * @return Total memory occupied by this MemStore. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreCompactor.java new file mode 100644 index 0000000..83117d9 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreCompactor.java @@ -0,0 +1,236 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Scan; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * The ongoing MemStore Compaction manager, dispatches a solo running compaction + * and interrupts the compaction if requested. + * The MemStoreScanner is used to traverse the compaction pipeline. The MemStoreScanner + * is included in internal store scanner, where all compaction logic is implemented. + * Threads safety: It is assumed that the compaction pipeline is immutable, + * therefore no special synchronization is required. + */ +@InterfaceAudience.Private class MemStoreCompactor { + private static final Log LOG = LogFactory.getLog(MemStoreCompactor.class); + + private CompactionPipeline cp; // the subject for compaction + private CompactedMemStore ms; // backward reference + private MemStoreScanner scanner; // scanner for pipeline only + + private StoreScanner compactingScanner; // scanner on top of MemStoreScanner + // that uses ScanQueryMatcher + private Configuration conf; + private long // smallest read point for any ongoing + smallestReadPoint; // MemStore scan + private VersionedSegmentsList // a static version of the CellSetMgrs + versionedList; // list from the pipeline + private final CellComparator comparator; + + private static final ExecutorService pool // Thread pool shared by all scanners + = Executors.newCachedThreadPool(); + private final AtomicBoolean inCompaction = new AtomicBoolean(false); + private final AtomicBoolean isInterrupted = new AtomicBoolean(false); + + /** + * ---------------------------------------------------------------------- + * The constructor is used only to initialize basics, other parameters + * needing to start compaction will come with startCompact() + */ + public MemStoreCompactor(CompactedMemStore ms, CompactionPipeline cp, + CellComparator comparator, Configuration conf) { + + this.ms = ms; + this.cp = cp; + this.comparator = comparator; + this.conf = conf; + } + + /** + * ---------------------------------------------------------------------- + * The request to dispatch the compaction asynchronous task. + * The method returns true if compaction was successfully dispatched, or false if there + * is already an ongoing compaction (or pipeline is empty). + */ + public boolean startCompact(Store store) throws IOException { + if (cp.isEmpty()) return false; // no compaction on empty pipeline + + if (!inCompaction.get()) { // dispatch + List scanners = new ArrayList(); + this.versionedList = // get the list of CellSetMgrs from the pipeline + cp.getVersionedList(); // the list is marked with specific version + + // create the list of scanners with maximally possible read point, meaning that + // all KVs are going to be returned by the pipeline traversing + for (StoreSegment segment : this.versionedList.getStoreSegments()) { + scanners.add(segment.getScanner(Long.MAX_VALUE)); + } + scanner = + new MemStoreScanner(ms, scanners, Long.MAX_VALUE, MemStoreScanner.Type.COMPACT_FORWARD); + + smallestReadPoint = store.getSmallestReadPoint(); + compactingScanner = createScanner(store); + + Runnable worker = new Worker(); + LOG.info("Starting the MemStore in-memory compaction"); + pool.execute(worker); + inCompaction.set(true); + return true; + } + return false; + } + + /*---------------------------------------------------------------------- + * The request to cancel the compaction asynchronous task + * The compaction may still happen if the request was sent too late + * Non-blocking request + */ + public void stopCompact() { + if (inCompaction.get()) isInterrupted.compareAndSet(false, true); + inCompaction.set(false); + } + + public boolean isInCompaction() { + return inCompaction.get(); + } + + /*---------------------------------------------------------------------- + * Close the scanners and clear the pointers in order to allow good + * garbage collection + */ + private void releaseResources() { + isInterrupted.set(false); + scanner.close(); + scanner = null; + compactingScanner.close(); + compactingScanner = null; + versionedList = null; + } + + /*---------------------------------------------------------------------- + * The worker thread performs the compaction asynchronously. + * The solo (per compactor) thread only reads the compaction pipeline. + * There is at most one thread per memstore instance. + */ + private class Worker implements Runnable { + + @Override public void run() { + ImmutableSegment result = StoreSegmentFactory.instance() + .createImmutableSegment(conf, comparator, + CompactedMemStore.DEEP_OVERHEAD_PER_PIPELINE_ITEM); + // the compaction processing + KeyValue cell; + try { + // Phase I: create the compacted MutableCellSetSegment + compactSegments(result); + // Phase II: swap the old compaction pipeline + if (!Thread.currentThread().isInterrupted()) { + cp.swap(versionedList, result); + // update the wal so it can be truncated and not get too long + ms.updateLowestUnflushedSequenceIdInWal(true); // only if greater + } + } catch (Exception e) { + Thread.currentThread().interrupt(); + return; + } finally { + stopCompact(); + releaseResources(); + } + + } + } + + /** + * Creates the scanner for compacting the pipeline. + * + * @return the scanner + */ + private StoreScanner createScanner(Store store) throws IOException { + + Scan scan = new Scan(); + scan.setMaxVersions(); //Get all available versions + + StoreScanner internalScanner = + new StoreScanner(store, store.getScanInfo(), scan, Collections.singletonList(scanner), + ScanType.COMPACT_RETAIN_DELETES, smallestReadPoint, HConstants.OLDEST_TIMESTAMP); + + return internalScanner; + } + + /** + * Creates a single StoreSegment using the internal store scanner, + * who in turn uses ScanQueryMatcher + */ + private void compactSegments(StoreSegment result) throws IOException { + + List kvs = new ArrayList(); + // get the limit to the size of the groups to be returned by compactingScanner + int compactionKVMax = conf.getInt( + HConstants.COMPACTION_KV_MAX, + HConstants.COMPACTION_KV_MAX_DEFAULT); + + ScannerContext scannerContext = + ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build(); + + boolean hasMore; + do { + hasMore = compactingScanner.next(kvs, scannerContext); + if (!kvs.isEmpty()) { + for (Cell c : kvs) { + // The scanner is doing all the elimination logic + // now we just copy it to the new segment + KeyValue kv = KeyValueUtil.ensureKeyValue(c); + Cell newKV = result.maybeCloneWithAllocator(kv); + result.add(newKV); + + } + kvs.clear(); + } + } while (hasMore && (!isInterrupted.get())); + } + + // methods for tests + void toggleCompaction(boolean on) { + if (on) { + inCompaction.set(false); + } else { + inCompaction.set(true); + } + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java index 40c5046..2733023 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java @@ -165,7 +165,7 @@ class MemStoreFlusher implements FlushRequester { Region regionToFlush; if (bestFlushableRegion != null && - bestAnyRegion.getMemstoreSize() > 2 * bestFlushableRegion.getMemstoreSize()) { + bestAnyRegion.getMemstoreTotalSize() > 2 * bestFlushableRegion.getMemstoreTotalSize()) { // Even if it's not supposed to be flushed, pick a region if it's more than twice // as big as the best flushable one - otherwise when we're under pressure we make // lots of little flushes and cause lots of compactions, etc, which just makes @@ -214,6 +214,7 @@ class MemStoreFlusher implements FlushRequester { + humanReadableInt(regionToFlush.getMemstoreSize())); flushedOne = flushRegion(regionToFlush, true, true); + Preconditions.checkState(regionToFlush.getMemstoreTotalSize() > 0); if (!flushedOne) { LOG.info("Excluding unflushable region " + regionToFlush + " - trying to find a different region to flush."); @@ -356,12 +357,12 @@ class MemStoreFlusher implements FlushRequester { } @Override - public void requestFlush(Region r, boolean forceFlushAllStores) { + public void requestFlush(Region r, boolean forceFlushAllStores, boolean forceFlushForCompacted) { synchronized (regionsInQueue) { if (!regionsInQueue.containsKey(r)) { // This entry has no delay so it will be added at the top of the flush // queue. It'll come out near immediately. - FlushRegionEntry fqe = new FlushRegionEntry(r, forceFlushAllStores); + FlushRegionEntry fqe = new FlushRegionEntry(r, forceFlushAllStores, forceFlushForCompacted); this.regionsInQueue.put(r, fqe); this.flushQueue.add(fqe); } @@ -373,7 +374,7 @@ class MemStoreFlusher implements FlushRequester { synchronized (regionsInQueue) { if (!regionsInQueue.containsKey(r)) { // This entry has some delay - FlushRegionEntry fqe = new FlushRegionEntry(r, forceFlushAllStores); + FlushRegionEntry fqe = new FlushRegionEntry(r, forceFlushAllStores, false); fqe.requeue(delay); this.regionsInQueue.put(r, fqe); this.flushQueue.add(fqe); @@ -487,11 +488,13 @@ class MemStoreFlusher implements FlushRequester { private boolean flushRegion(final Region region, final boolean emergencyFlush, boolean forceFlushAllStores) { long startTime = 0; + boolean forceFlushInsteadOfCompaction = false; synchronized (this.regionsInQueue) { FlushRegionEntry fqe = this.regionsInQueue.remove(region); // Use the start time of the FlushRegionEntry if available if (fqe != null) { startTime = fqe.createTime; + forceFlushInsteadOfCompaction = fqe.forceFlushForCompacted; } if (fqe != null && emergencyFlush) { // Need to remove from region from delay queue. When NOT an @@ -508,7 +511,7 @@ class MemStoreFlusher implements FlushRequester { lock.readLock().lock(); try { notifyFlushRequest(region, emergencyFlush); - FlushResult flushResult = region.flush(forceFlushAllStores); + FlushResult flushResult = region.flush(forceFlushAllStores,forceFlushInsteadOfCompaction); boolean shouldCompact = flushResult.isCompactionNeeded(); // We just want to check the size boolean shouldSplit = ((HRegion)region).checkSplit() != null; @@ -726,12 +729,14 @@ class MemStoreFlusher implements FlushRequester { private int requeueCount = 0; private boolean forceFlushAllStores; + private boolean forceFlushForCompacted; - FlushRegionEntry(final Region r, boolean forceFlushAllStores) { + FlushRegionEntry(final Region r, boolean forceFlushAllStores, boolean forceFlushForCompacted) { this.region = r; this.createTime = EnvironmentEdgeManager.currentTime(); this.whenToExpire = this.createTime; this.forceFlushAllStores = forceFlushAllStores; + this.forceFlushForCompacted = forceFlushForCompacted; } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreScanner.java new file mode 100644 index 0000000..dda6df5 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreScanner.java @@ -0,0 +1,317 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.htrace.Trace; + +import java.io.IOException; +import java.util.List; +import java.util.SortedSet; + +/** + * This is the scanner for any *MemStore implementation, derived from MemStore. + * Currently, the scanner works with DefaultMemStore and CompactMemStore. + * The MemStoreScanner combines CellSetMgrScanners from different CellSetMgrs and + * uses the key-value heap and the reversed key-value heap for the aggregated key-values set. + * It is assumed that only traversing forward or backward is used (without zigzagging in between) + */ +@InterfaceAudience.Private +public class MemStoreScanner extends NonLazyKeyValueScanner { + /** + * Types of cell MemStoreScanner + */ + static public enum Type { + UNDEFINED, + COMPACT_FORWARD, + USER_SCAN_FORWARD, + USER_SCAN_BACKWARD + } + + private KeyValueHeap forwardHeap; // heap of scanners used for traversing forward + private ReversedKeyValueHeap backwardHeap; // reversed scanners heap for traversing backward + + private Type type = Type.UNDEFINED; // The type of the scan is defined by constructor + // or according to the first usage + + private long readPoint; + List scanners; // remember the initial version of the scanners list + private AbstractMemStore // pointer back to the relevant MemStore + backwardReferenceToMemStore; // is needed for shouldSeek() method + + /** + * Constructor. + * If UNDEFINED type for MemStoreScanner is provided, the forward heap is used as default! + * After constructor only one heap is going to be initialized for entire lifespan + * of the MemStoreScanner. A specific scanner ca only be one directed! + * + * @param readPoint Read point below which we can safely remove duplicate KVs + * @param type The scan type COMPACT_FORWARD should be used for compaction + * @param ms Pointer back to the MemStore + */ + public MemStoreScanner(AbstractMemStore ms, long readPoint, Type type) throws IOException { + this(ms, ms.getListOfScanners(readPoint), readPoint, type); + } + + /* Constructor used only when the scan usage is unknown + and need to be defined according to the first move */ + public MemStoreScanner(AbstractMemStore ms, long readPt) throws IOException { + this(ms, readPt, Type.UNDEFINED); + } + + public MemStoreScanner(AbstractMemStore ms, List scanners, long readPoint, + Type type) throws IOException { + super(); + this.readPoint = readPoint; + this.type = type; + switch (type) { + case UNDEFINED: + case USER_SCAN_FORWARD: + case COMPACT_FORWARD: + this.forwardHeap = new KeyValueHeap(scanners, ms.getComparator()); + break; + case USER_SCAN_BACKWARD: + this.backwardHeap = new ReversedKeyValueHeap(scanners, ms.getComparator()); + break; + default: + throw new IOException("Unknown scanner type in MemStoreScanner"); + } + this.backwardReferenceToMemStore = ms; + this.scanners = scanners; + if (Trace.isTracing() && Trace.currentSpan() != null) { + Trace.currentSpan().addTimelineAnnotation("Creating MemStoreScanner"); + } + } + + /** + * Returns the cell from the top-most scanner without advancing the iterator. + * The backward traversal is assumed, only if specified explicitly + */ + @Override public synchronized Cell peek() { + if (type == Type.USER_SCAN_BACKWARD) return backwardHeap.peek(); + return forwardHeap.peek(); + } + + /** + * Gets the next cell from the top-most scanner. Assumed forward scanning. + */ + @Override public synchronized Cell next() throws IOException { + KeyValueHeap heap = (Type.USER_SCAN_BACKWARD == type) ? backwardHeap : forwardHeap; + + for (Cell currentCell = heap.next(); // loop over till the next suitable value + currentCell != null; // take next value from the forward heap + currentCell = heap.next()) { + + // all the logic of presenting cells is inside the internal MemStoreSegmentScanners + // located inside the heap + + return currentCell; + } + return null; + } + + /** + * Set the scanner at the seek key. Assumed forward scanning. + * Must be called only once: there is no thread safety between the scanner + * and the memStore. + * + * @param cell seek value + * @return false if the key is null or if there is no data + */ + @Override public synchronized boolean seek(Cell cell) throws IOException { + assertForward(); + + if (cell == null) { + close(); + return false; + } + + return forwardHeap.seek(cell); + } + + /** + * Move forward on the sub-lists set previously by seek. Assumed forward scanning. + * + * @param cell seek value (should be non-null) + * @return true if there is at least one KV to read, false otherwise + */ + @Override public synchronized boolean reseek(Cell cell) throws IOException { + /* + * See HBASE-4195 & HBASE-3855 & HBASE-6591 for the background on this implementation. + * This code is executed concurrently with flush and puts, without locks. + * Two points must be known when working on this code: + * 1) It's not possible to use the 'kvTail' and 'snapshot' + * variables, as they are modified during a flush. + * 2) The ideal implementation for performance would use the sub skip list + * implicitly pointed by the iterators 'kvsetIt' and + * 'snapshotIt'. Unfortunately the Java API does not offer a method to + * get it. So we remember the last keys we iterated to and restore + * the reseeked set to at least that point. + * + * TODO: The above comment copied from the original MemStoreScanner + */ + assertForward(); + return forwardHeap.reseek(cell); + } + + /** + * MemStoreScanner returns max value as sequence id because it will + * always have the latest data among all files. + */ + @Override public synchronized long getSequenceID() { + return Long.MAX_VALUE; + } + + @Override public synchronized void close() { + + if (forwardHeap != null) { + assert ((type == Type.USER_SCAN_FORWARD) || + (type == Type.COMPACT_FORWARD) || (type == Type.UNDEFINED)); + forwardHeap.close(); + forwardHeap = null; + if (backwardHeap != null) { + backwardHeap.close(); + backwardHeap = null; + } + } else if (backwardHeap != null) { + assert (type == Type.USER_SCAN_BACKWARD); + backwardHeap.close(); + backwardHeap = null; + } + } + + /** + * Set the scanner at the seek key. Assumed backward scanning. + * + * @param cell seek value + * @return false if the key is null or if there is no data + */ + @Override public synchronized boolean backwardSeek(Cell cell) throws IOException { + initiBackwHeapIfNeeded(cell, false); + return backwardHeap.backwardSeek(cell); + } + + /** + * Assumed backward scanning. + * + * @param cell seek value + * @return false if the key is null or if there is no data + */ + @Override public synchronized boolean seekToPreviousRow(Cell cell) throws IOException { + initiBackwHeapIfNeeded(cell, false); + if (backwardHeap.peek() == null) restartBackwHeap(cell); + return backwardHeap.seekToPreviousRow(cell); + } + + @Override public synchronized boolean seekToLastRow() throws IOException { + // TODO: it looks like this is how it should be, however ReversedKeyValueHeap class doesn't + // implement seekToLastRow() method :( + // however seekToLastRow() was implemented in internal MemStoreScanner + // so I wonder whether we need to come with our own workaround, or to update + // ReversedKeyValueHeap + return initiBackwHeapIfNeeded(KeyValue.LOWESTKEY, true); + //return backwardHeap.seekToLastRow(); + } + + /** + * Check if this memstore may contain the required keys + * + * @param scan + * @return False if the key definitely does not exist in this Memstore + */ + @Override public synchronized boolean shouldUseScanner(Scan scan, SortedSet columns, + long oldestUnexpiredTS) { + boolean result = false; + if (type == Type.COMPACT_FORWARD) return true; + + for (StoreSegmentScanner sc : scanners) { + result |= sc.shouldSeek(scan, oldestUnexpiredTS); + } + return result; + } + + // debug method + @Override + public String toString() { + String msg = ""; + int i = 1; + for (StoreSegmentScanner scanner : scanners) { + msg += "scanner (" + i + ") " + scanner.toString(); + i++; + } + return msg; + } + /****************** Private methods ******************/ + /** + * Restructure the ended backward heap after rerunning a seekToPreviousRow() + * on each scanner + */ + private boolean restartBackwHeap(Cell cell) throws IOException { + boolean res = false; + for (StoreSegmentScanner scan : scanners) + res |= scan.seekToPreviousRow(cell); + this.backwardHeap = + new ReversedKeyValueHeap(scanners, backwardReferenceToMemStore.getComparator()); + return res; + } + + /** + * Checks whether the type of the scan suits the assumption of moving forward + */ + private boolean initiBackwHeapIfNeeded(Cell cell, boolean toLast) throws IOException { + boolean res = false; + if (toLast && (type != Type.UNDEFINED)) + throw new IllegalStateException("Wrong usage of initiBackwHeapIfNeeded in parameters"); + if (type == Type.UNDEFINED) { + // In case we started from peek, release the forward heap + // and build backward. Set the correct type. Thus this turn + // can happen only once + if ((backwardHeap == null) && (forwardHeap != null)) { + forwardHeap.close(); + forwardHeap = null; + // before building the heap seek for the relevant key on the scanners, + // for the heap to be built from the scanners correctly + for (StoreSegmentScanner scan : scanners) + if (toLast) res |= scan.seekToLastRow(); + else res |= scan.backwardSeek(cell); + this.backwardHeap = + new ReversedKeyValueHeap(scanners, backwardReferenceToMemStore.getComparator()); + type = Type.USER_SCAN_BACKWARD; + } + } + + if (type == Type.USER_SCAN_FORWARD) + throw new IllegalStateException("Traversing backward with forward scan"); + return res; + } + + /** + * Checks whether the type of the scan suits the assumption of moving forward + */ + private void assertForward() throws IllegalStateException { + if (type == Type.UNDEFINED) type = Type.USER_SCAN_FORWARD; + + if (type == Type.USER_SCAN_BACKWARD) + throw new IllegalStateException("Traversing forward with backward scan"); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSnapshot.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSnapshot.java index be853c5..c70cc85 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSnapshot.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSnapshot.java @@ -34,14 +34,13 @@ public class MemStoreSnapshot { private final KeyValueScanner scanner; private final boolean tagsPresent; - public MemStoreSnapshot(long id, int cellsCount, long size, TimeRangeTracker timeRangeTracker, - KeyValueScanner scanner, boolean tagsPresent) { + public MemStoreSnapshot(long id, ImmutableSegment snapshot) { this.id = id; - this.cellsCount = cellsCount; - this.size = size; - this.timeRangeTracker = timeRangeTracker; - this.scanner = scanner; - this.tagsPresent = tagsPresent; + this.cellsCount = snapshot.getCellsCount(); + this.size = snapshot.getSize(); + this.timeRangeTracker = snapshot.getTimeRangeTracker(); + this.scanner = snapshot.getScannerForMemStoreSnapshot(); + this.tagsPresent = snapshot.isTagsPresent(); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableCellSetSegment.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableCellSetSegment.java new file mode 100644 index 0000000..11fdbdd --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableCellSetSegment.java @@ -0,0 +1,227 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRange; + +import java.util.Iterator; +import java.util.SortedSet; +import java.util.concurrent.atomic.AtomicLong; + +/** + * This mutable store segment encapsulates a mutable cell set and its respective memory allocation + * buffers (MSLAB). + */ +@InterfaceAudience.Private +final class MutableCellSetSegment extends MutableSegment { + + private volatile CellSet cellSet; + private volatile MemStoreLAB memStoreLAB; + private final CellComparator comparator; + private final AtomicLong size; + + // private c-tors. Instantiate objects only using factory + public MutableCellSetSegment(CellSet cellSet, MemStoreLAB memStoreLAB, long size, + CellComparator comparator) { + this.cellSet = cellSet; + this.memStoreLAB = memStoreLAB; + this.comparator = comparator; + this.size = new AtomicLong(size); + } + + @Override + public StoreSegmentScanner getScanner(long readPoint) { + return new MutableCellSetSegmentScanner(this, readPoint); + } + + @Override + public boolean isEmpty() { + return getCellSet().isEmpty(); + } + + @Override + public int getCellsCount() { + return getCellSet().size(); + } + + @Override + public long add(Cell cell) { + boolean succ = getCellSet().add(cell); + long s = AbstractMemStore.heapSizeChange(cell, succ); + updateMetaInfo(cell, s); + // In no tags case this NoTagsKeyValue.getTagsLength() is a cheap call. + // When we use ACL CP or Visibility CP which deals with Tags during + // mutation, the TagRewriteCell.getTagsLength() is a cheaper call. We do not + // parse the byte[] to identify the tags length. + if(cell.getTagsLength() > 0) { + tagsPresent = true; + } + return s; + } + + @Override + public long rollback(Cell cell) { + Cell found = get(cell); + if (found != null && found.getSequenceId() == cell.getSequenceId()) { + long sz = AbstractMemStore.heapSizeChange(cell, true); + remove(cell); + size.addAndGet(-sz); + return sz; + } + return 0; + } + + @Override + public Cell getFirstAfter(Cell cell) { + SortedSet snTailSet = tailSet(cell); + if (!snTailSet.isEmpty()) { + return snTailSet.first(); + } + return null; + } + + @Override + public void close() { + MemStoreLAB mslab = getMemStoreLAB(); + if(mslab != null) { + mslab.close(); + } + // do not set MSLab to null as scanners may still be reading the data here and need to decrease + // the counter when they finish + } + + @Override + public Cell maybeCloneWithAllocator(Cell cell) { + if (getMemStoreLAB() == null) { + return cell; + } + + int len = KeyValueUtil.length(cell); + ByteRange alloc = getMemStoreLAB().allocateBytes(len); + if (alloc == null) { + // The allocation was too large, allocator decided + // not to do anything with it. + return cell; + } + assert alloc.getBytes() != null; + KeyValueUtil.appendToByteArray(cell, alloc.getBytes(), alloc.getOffset()); + KeyValue newKv = new KeyValue(alloc.getBytes(), alloc.getOffset(), len); + newKv.setSequenceId(cell.getSequenceId()); + return newKv; + } + + @Override + public StoreSegment setSize(long size) { + this.size.set(size); + return this; + } + + @Override + public long getSize() { + return size.get(); + } + + @Override + public void dump(Log log) { + for (Cell cell: getCellSet()) { + log.debug(cell); + } + } + + //**** Public methods for mutable memstore segment + @Override + public SortedSet tailSet(Cell firstCell) { + return getCellSet().tailSet(firstCell); + } + @Override + public void incSize(long delta) { + size.addAndGet(delta); + } + @Override + public CellSet getCellSet() { + return cellSet; + } + @Override + public CellComparator getComparator() { + return comparator; + } + + //*** Methods for MemStoreSegmentsScanner + public Cell last() { + return getCellSet().last(); + } + + public Iterator iterator() { + return getCellSet().iterator(); + } + + public SortedSet headSet(KeyValue firstKeyOnRow) { + return getCellSet().headSet(firstKeyOnRow); + } + + public void incScannerCount() { + if(getMemStoreLAB() != null) { + getMemStoreLAB().incScannerCount(); + } + } + + public void decScannerCount() { + if(getMemStoreLAB() != null) { + getMemStoreLAB().decScannerCount(); + } + } + + public int compare(Cell left, Cell right) { + return getComparator().compare(left, right); + } + + public int compareRows(Cell left, Cell right) { + return getComparator().compareRows(left, right); + } + + private Cell get(Cell cell) { + return getCellSet().get(cell); + } + + private boolean remove(Cell e) { + return getCellSet().remove(e); + } + + private void updateMetaInfo(Cell toAdd, long s) { + getTimeRangeTracker().includeTimestamp(toAdd); + size.addAndGet(s); + } + + private MemStoreLAB getMemStoreLAB() { + return memStoreLAB; + } + + // methods for tests + @Override + Cell first() { + return this.getCellSet().first(); + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableCellSetSegmentScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableCellSetSegmentScanner.java new file mode 100644 index 0000000..4a6377a --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableCellSetSegmentScanner.java @@ -0,0 +1,433 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Scan; + +import java.io.IOException; +import java.util.Iterator; +import java.util.SortedSet; + +/** + * A scanner of a single cells segment {@link MutableCellSetSegment}. + */ +@InterfaceAudience.Private +class MutableCellSetSegmentScanner implements StoreSegmentScanner { + + private final MutableCellSetSegment segment; // the observed structure + private long readPoint; // the highest relevant MVCC + private Iterator iter; // the current iterator that can be reinitialized by + // seek(), backwardSeek(), or reseek() + private Cell current = null; // the pre-calculated cell to be returned by peek() + // or next() + // A flag represents whether could stop skipping KeyValues for MVCC + // if have encountered the next row. Only used for reversed scan + private boolean stopSkippingKVsIfNextRow = false; + // last iterated KVs by seek (to restore the iterator state after reseek) + private Cell last = null; + private long sequenceID = Long.MAX_VALUE; + + /** + * --------------------------------------------------------- + * C-tor + */ + public MutableCellSetSegmentScanner(MutableCellSetSegment segment, long readPoint) { + super(); + this.segment = segment; + this.readPoint = readPoint; + iter = segment.iterator(); + // the initialization of the current is required for working with heap of SegmentScanners + current = getNext(); + //increase the reference count so the underlying structure will not be de-allocated + this.segment.incScannerCount(); + } + + + /** + * --------------------------------------------------------- + * Look at the next Cell in this scanner, but do not iterate the scanner + * + * @return the currently observed Cell + */ + @Override + public Cell peek() { // sanity check, the current should be always valid + if (current!=null && current.getSequenceId() > readPoint) { + assert (false); // sanity check, the current should be always valid + } + + return current; + } + + + /** + * --------------------------------------------------------- + * Return the next Cell in this scanner, iterating the scanner + * + * @return the next Cell or null if end of scanner + */ + @Override + public Cell next() throws IOException { + Cell oldCurrent = current; + current = getNext(); // update the currently observed Cell + return oldCurrent; + } + + + /** + * --------------------------------------------------------- + * Seek the scanner at or after the specified KeyValue. + * + * @param cell seek value + * @return true if scanner has values left, false if end of scanner + */ + @Override + public boolean seek(Cell cell) throws IOException { + // restart the iterator from new key + iter = segment.tailSet(cell).iterator(); + last = null; // last is going to be reinitialized in the next getNext() call + current = getNext(); + return (current != null); + } + + + /** + * --------------------------------------------------------- + * Reseek the scanner at or after the specified KeyValue. + * This method is guaranteed to seek at or after the required key only if the + * key comes after the current position of the scanner. Should not be used + * to seek to a key which may come before the current position. + * + * @param cell seek value (should be non-null) + * @return true if scanner has values left, false if end of scanner + */ + @Override + public boolean reseek(Cell cell) throws IOException { + + /* + * The ideal implementation for performance would use the sub skip list implicitly + * pointed by the iterator. Unfortunately the Java API does not offer a method to + * get it. So we remember the last keys we iterated to and restore + * the reseeked set to at least that point. + */ + iter = segment.tailSet(getHighest(cell, last)).iterator(); + current = getNext(); + return (current != null); + } + + + /** + * --------------------------------------------------------- + * Get the sequence id associated with this KeyValueScanner. This is required + * for comparing multiple files (or memstore segments) scanners to find out + * which one has the latest data. + * + */ + @Override + public long getSequenceID() { + return sequenceID; + } + + @Override + public void setSequenceID(long x) { + sequenceID = x; + } + + + /** + * --------------------------------------------------------- + * Close the KeyValue scanner. + */ + @Override + public void close() { + this.segment.decScannerCount(); + } + + + /** + * --------------------------------------------------------- + * Allows to filter out scanners (both StoreFile and memstore) that we don't + * want to use based on criteria such as Bloom filters and timestamp ranges. + * + * @param scan the scan that we are selecting scanners for + * @param columns the set of columns in the current column family, or null if + * not specified by the scan + * @param oldestUnexpiredTS the oldest timestamp we are interested in for + * this query, based on TTL + * @return true if the scanner should be included in the query + *

+ * This functionality should be resolved in the higher level which is + * MemStoreScanner, currently returns true as default. Doesn't throw + * IllegalStateException in order not to change the signature of the + * overridden method + */ + @Override + public boolean shouldUseScanner(Scan scan, SortedSet columns, + long oldestUnexpiredTS) { + return true; + } + + + /** + * --------------------------------------------------------- + * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only + * does a seek operation after checking that it is really necessary for the + * row/column combination specified by the kv parameter. This function was + * added to avoid unnecessary disk seeks by checking row-column Bloom filters + * before a seek on multi-column get/scan queries, and to optimize by looking + * up more recent files first. + *

+ * This scanner is working solely on the in-memory MemStore therefore this + * interface is not relevant. + * + * @param c + * @param forward do a forward-only "reseek" instead of a random-access seek + * @param useBloom whether to enable multi-column Bloom filter optimization + */ + @Override + public boolean requestSeek(Cell c, boolean forward, boolean useBloom) + throws IOException { + + throw new IllegalStateException( + "requestSeek cannot be called on MutableCellSetSegmentScanner"); + } + + + /** + * --------------------------------------------------------- + * We optimize our store scanners by checking the most recent store file + * first, so we sometimes pretend we have done a seek but delay it until the + * store scanner bubbles up to the top of the key-value heap. This method is + * then used to ensure the top store file scanner has done a seek operation. + *

+ * This scanner is working solely on the in-memory MemStore and doesn't work on + * store files, MutableCellSetSegmentScanner always does the seek, + * therefore always returning true. + */ + @Override + public boolean realSeekDone() { + return true; + } + + + /** + * --------------------------------------------------------- + * Does the real seek operation in case it was skipped by + * seekToRowCol(KeyValue, boolean). Note that this function should + * be never called on scanners that always do real seek operations (i.e. most + * of the scanners and also this one). The easiest way to achieve this is to call + * {@link #realSeekDone()} first. + */ + @Override + public void enforceSeek() throws IOException { + throw new IllegalStateException( + "enforceSeek cannot be called on MutableCellSetSegmentScanner"); + } + + + /** + * --------------------------------------------------------- + * + * @return true if this is a file scanner. Otherwise a memory scanner is + * assumed. + */ + @Override + public boolean isFileScanner() { + return false; + } + + + /** + * --------------------------------------------------------- + * Seek the scanner at or before the row of specified Cell, it firstly + * tries to seek the scanner at or after the specified Cell, return if + * peek KeyValue of scanner has the same row with specified Cell, + * otherwise seek the scanner at the first Cell of the row which is the + * previous row of specified KeyValue + * + * @param key seek KeyValue + * @return true if the scanner is at the valid KeyValue, false if such + * KeyValue does not exist + */ + @Override + public boolean backwardSeek(Cell key) throws IOException { + seek(key); // seek forward then go backward + if (peek() == null || segment.compareRows(peek(), key) > 0) { + return seekToPreviousRow(key); + } + return true; + } + + + /** + * --------------------------------------------------------- + * Seek the scanner at the first Cell of the row which is the previous row + * of specified key + * + * @param cell seek value + * @return true if the scanner at the first valid Cell of previous row, + * false if not existing such Cell + */ + @Override + public boolean seekToPreviousRow(Cell cell) throws IOException { + + KeyValue firstKeyOnRow = // find a previous cell + KeyValueUtil.createFirstOnRow(cell.getRowArray(), + cell.getRowOffset(), cell.getRowLength()); + SortedSet cellHead = // here the search is hidden, reset the iterator + segment.headSet(firstKeyOnRow); + Cell lastCellBeforeRow = cellHead.isEmpty() ? null : cellHead.last(); + + if (lastCellBeforeRow == null) { // end of recursion + current = null; + return false; + } + + KeyValue firstKeyOnPreviousRow = // find a previous row + KeyValueUtil.createFirstOnRow(lastCellBeforeRow.getRowArray(), + lastCellBeforeRow.getRowOffset(), lastCellBeforeRow.getRowLength()); + + stopSkippingKVsIfNextRow = true; + // seek in order to update the iterator and current + seek(firstKeyOnPreviousRow); + stopSkippingKVsIfNextRow = false; + + // if nothing found or we searched beyond the needed, take one more step backward + if (peek() == null || segment.compareRows(peek(), firstKeyOnPreviousRow) > 0) { + return seekToPreviousRow(lastCellBeforeRow); + } + return true; + } + + + /** + * --------------------------------------------------------- + * Seek the scanner at the first KeyValue of last row + * + * @return true if scanner has values left, false if the underlying data is empty + * @throws java.io.IOException + */ + @Override + public boolean seekToLastRow() throws IOException { + Cell higherCell = segment.isEmpty() ? null : segment.last(); + if (higherCell == null) { + return false; + } + + KeyValue firstCellOnLastRow = KeyValueUtil.createFirstOnRow(higherCell.getRowArray(), + higherCell.getRowOffset(), higherCell.getRowLength()); + + if (seek(firstCellOnLastRow)) { + return true; + } else { + return seekToPreviousRow(higherCell); + } + } + + + /** + * --------------------------------------------------------- + * + * @return the next key in the index (the key to seek to the next block) + * if known, or null otherwise + *

+ * Not relevant for in-memory scanner + */ + @Override + public Cell getNextIndexedKey() { + return null; + } + + /** + * Called after a batch of rows scanned (RPC) and set to be returned to client. Any in between + * cleanup can be done here. Nothing to be done for MutableCellSetSegmentScanner. + */ + @Override + public void shipped() throws IOException { + // do nothing + } + + @Override + public boolean shouldSeek(Scan scan, long oldestUnexpiredTS) { + return segment.shouldSeek(scan,oldestUnexpiredTS); + } + + //debug method + @Override + public String toString() { + String res = "Store segment scanner of type "+this.getClass().getName()+"; "; + res += "sequence id "+getSequenceID()+"; "; + res += segment.toString(); + return res; + } + +/********************* Private Methods **********************/ + + /** + * --------------------------------------------------------- + * Private internal method for iterating over the segment, + * skipping the cells with irrelevant MVCC + */ + private Cell getNext() { + Cell startKV = current; + Cell next = null; + + try { + while (iter.hasNext()) { + next = iter.next(); + if (next.getSequenceId() <= this.readPoint) { + return next; // skip irrelevant versions + } + if (stopSkippingKVsIfNextRow && // for backwardSeek() stay in the + startKV != null && // boundaries of a single row + segment.compareRows(next, startKV) > 0) { + return null; + } + } // end of while + + return null; // nothing found + } finally { + if (next != null) { + // in all cases, remember the last KV we iterated to, needed for reseek() + last = next; + } + } + } + + + /** + * --------------------------------------------------------- + * Private internal method that returns the higher of the two key values, or null + * if they are both null + */ + private Cell getHighest(Cell first, Cell second) { + if (first == null && second == null) { + return null; + } + if (first != null && second != null) { + int compare = segment.compare(first, second); + return (compare > 0 ? first : second); + } + return (first != null ? first : second); + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableSegment.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableSegment.java new file mode 100644 index 0000000..77b4a06 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MutableSegment.java @@ -0,0 +1,57 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; + +import java.util.SortedSet; + +/** + * An abstraction of a mutable segment in memstore, specifically the active segment. + */ +public abstract class MutableSegment extends StoreSegment { + + /** + * Returns a subset of the segment cell set, which starts with the given cell + * @param firstCell a cell in the segment + * @return a subset of the segment cell set, which starts with the given cell + */ + public abstract SortedSet tailSet(Cell firstCell); + + /** + * Increases the heap size counter of the segment by the given delta + * @param delta + */ + public abstract void incSize(long delta); + + /** + * Returns the Cell comparator used by this segment + * @return the Cell comparator used by this segment + */ + public abstract CellComparator getComparator(); + + //methods for test + + /** + * Returns the first cell in the segment + * @return the first cell in the segment + */ + abstract Cell first(); +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 3c0f50a..5b42608 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -18,53 +18,16 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.net.BindException; -import java.net.InetSocketAddress; -import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.NavigableMap; -import java.util.Set; -import java.util.TreeSet; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; - +import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellScannable; -import org.apache.hadoop.hbase.CellScanner; -import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.DoNotRetryIOException; -import org.apache.hadoop.hbase.DroppedSnapshotException; -import org.apache.hadoop.hbase.HBaseIOException; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.NotServingRegionException; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.UnknownScannerException; +import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.client.Append; -import org.apache.hadoop.hbase.client.ConnectionUtils; -import org.apache.hadoop.hbase.client.Delete; -import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.client.Get; -import org.apache.hadoop.hbase.client.Increment; -import org.apache.hadoop.hbase.client.Mutation; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException; import org.apache.hadoop.hbase.exceptions.MergeRegionException; @@ -72,78 +35,21 @@ import org.apache.hadoop.hbase.exceptions.OperationConflictException; import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException; import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; -import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler; -import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController; -import org.apache.hadoop.hbase.ipc.PriorityFunction; -import org.apache.hadoop.hbase.ipc.QosPriority; -import org.apache.hadoop.hbase.ipc.RpcCallContext; +import org.apache.hadoop.hbase.ipc.*; import org.apache.hadoop.hbase.ipc.RpcCallback; -import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.ipc.RpcServer.BlockingServiceAndInterface; -import org.apache.hadoop.hbase.ipc.RpcServerInterface; -import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; -import org.apache.hadoop.hbase.ipc.ServerRpcController; import org.apache.hadoop.hbase.master.MasterRpcServices; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.RequestConverter; import org.apache.hadoop.hbase.protobuf.ResponseConverter; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CompactRegionRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CompactRegionResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.FlushRegionRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.FlushRegionResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetOnlineRegionRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetOnlineRegionResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetServerInfoRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetServerInfoResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetStoreFileRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetStoreFileResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.MergeRegionsRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.MergeRegionsResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest; +import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.*; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest.RegionOpenInfo; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionResponse; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionResponse.RegionOpeningState; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ReplicateWALEntryRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ReplicateWALEntryResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.RollWALWriterRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.RollWALWriterResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.SplitRegionRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.SplitRegionResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.StopServerRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.StopServerResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateConfigurationRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateConfigurationResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateFavoredNodesRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateFavoredNodesResponse; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WALEntry; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WarmupRegionRequest; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WarmupRegionResponse; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest; +import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.*; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest.FamilyPath; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileResponse; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Condition; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceRequest; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceResponse; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MultiRequest; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MultiResponse; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutateRequest; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutateResponse; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.RegionAction; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.RegionActionResult; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ResultOrException; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest; -import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameInt64Pair; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier; @@ -166,12 +72,7 @@ import org.apache.hadoop.hbase.regionserver.handler.OpenMetaHandler; import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.security.User; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.Counter; -import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; -import org.apache.hadoop.hbase.util.Pair; -import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; -import org.apache.hadoop.hbase.util.Strings; +import org.apache.hadoop.hbase.util.*; import org.apache.hadoop.hbase.wal.WAL; import org.apache.hadoop.hbase.wal.WALKey; import org.apache.hadoop.hbase.wal.WALSplitter; @@ -179,12 +80,15 @@ import org.apache.hadoop.hbase.zookeeper.ZKSplitLog; import org.apache.hadoop.net.DNS; import org.apache.zookeeper.KeeperException; -import com.google.common.annotations.VisibleForTesting; -import com.google.protobuf.ByteString; -import com.google.protobuf.Message; -import com.google.protobuf.RpcController; -import com.google.protobuf.ServiceException; -import com.google.protobuf.TextFormat; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.BindException; +import java.net.InetSocketAddress; +import java.net.UnknownHostException; +import java.util.*; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; /** * Implements the regionserver RPC services. @@ -551,7 +455,8 @@ public class RSRpcServices implements HBaseRPCErrorHandler, throw new DoNotRetryIOException("Atomic put and/or delete only, not " + type.name()); } } - return region.checkAndRowMutate(row, family, qualifier, compareOp, comparator, rm, Boolean.TRUE); + return region.checkAndRowMutate(row, family, qualifier, compareOp, comparator, rm, + Boolean.TRUE); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java index 6d87057..b3d0272 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java @@ -190,6 +190,9 @@ public interface Region extends ConfigurationObserver { /** @return memstore size for this region, in bytes */ long getMemstoreSize(); + /** @return total memstore size, including additional, like compactoin pipelines */ + public long getMemstoreTotalSize(); + /** @return the number of mutations processed bypassing the WAL */ long getNumMutationsWithoutWAL(); @@ -636,14 +639,38 @@ public interface Region extends ConfigurationObserver { * *

This method may block for some time, so it should not be called from a * time-sensitive thread. - * @param force whether we want to force a flush of all stores + * @param forceFlushAllStores whether we want to force a flush of all stores + * @return FlushResult indicating whether the flush was successful or not and if + * the region needs compacting + * + * @throws IOException general io exceptions + * because a snapshot was not properly persisted. + */ + FlushResult flush(boolean forceFlushAllStores) throws IOException; + + /** + * Flush the cache. + * + *

When this method is called the cache will be flushed unless: + *

    + *
  1. the cache is empty
  2. + *
  3. the region is closed.
  4. + *
  5. a flush is already in progress
  6. + *
  7. writes are disabled
  8. + *
+ * + *

This method may block for some time, so it should not be called from a + * time-sensitive thread. + * @param forceFlushAllStores whether we want to force a flush of all stores + * @param forceFlushInsteadOfCompaction whether to flush the compacting memstores as well * @return FlushResult indicating whether the flush was successful or not and if * the region needs compacting * * @throws IOException general io exceptions + * @throws DroppedSnapshotException Thrown when abort is required * because a snapshot was not properly persisted. */ - FlushResult flush(boolean force) throws IOException; + public FlushResult flush(boolean forceFlushAllStores, boolean forceFlushInsteadOfCompaction) throws IOException; /** * Synchronously compact all stores in the region. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAccounting.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAccounting.java index 879b573..303290a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAccounting.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAccounting.java @@ -18,13 +18,13 @@ */ package org.apache.hadoop.hbase.regionserver; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.Bytes; + import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicLong; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.Bytes; - /** * RegionServerAccounting keeps record of some basic real time information about * the Region Server. Currently, it only keeps record the global memstore size. @@ -33,7 +33,8 @@ import org.apache.hadoop.hbase.util.Bytes; public class RegionServerAccounting { private final AtomicLong atomicGlobalMemstoreSize = new AtomicLong(0); - + private final AtomicLong atomicGlobalMemstorAdditionaleSize = new AtomicLong(0); + // Store the edits size during replaying WAL. Use this to roll back the // global memstore size once a region opening failed. private final ConcurrentMap replayEditsPerRegion = @@ -54,7 +55,11 @@ public class RegionServerAccounting { public long addAndGetGlobalMemstoreSize(long memStoreSize) { return atomicGlobalMemstoreSize.addAndGet(memStoreSize); } - + + public long addAndGetGlobalMemstoreAdditionalSize(long size) { + return atomicGlobalMemstorAdditionaleSize.addAndGet(size); + } + /*** * Add memStoreSize to replayEditsPerRegion. * diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java index 8d35a7d..9eeadbc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -17,13 +17,6 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.NavigableSet; - -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; @@ -32,6 +25,8 @@ import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.conf.PropagatingConfigurationObserver; import org.apache.hadoop.hbase.io.HeapSize; @@ -45,6 +40,11 @@ import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController; import org.apache.hadoop.hbase.util.Pair; +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.NavigableSet; + /** * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or * more StoreFiles, which stretch backwards over time. @@ -441,6 +441,19 @@ public interface Store extends HeapSize, StoreConfigInformation, PropagatingConf */ double getCompactionPressure(); + // turn on the force flush flag to make sure data is flushed to disk + void setForceFlushToDisk(); + // check whether memstore compaction is in progress + boolean isMemStoreInCompaction(); + // check whether memstore can be compacted + boolean isCompactedMemStore(); + // check whether can flush in memory + boolean shouldFlushInMemory(); + // flush memstore into an in-memory compacted segment + void flushInMemory(long flushOpSeqId); + // update wal with a new sequence id + void updateLowestUnflushedSequenceIdInWal(); + /** * Replaces the store files that the store has with the given files. Mainly used by * secondary region replicas to keep up to date with diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFlushContext.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFlushContext.java index 34ba1fa..f433334 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFlushContext.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFlushContext.java @@ -18,13 +18,13 @@ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.util.List; - -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import java.io.IOException; +import java.util.List; + /** * A package protected interface for a store flushing. * A store flush context carries the state required to prepare/flush/commit the store's cache. @@ -38,8 +38,9 @@ interface StoreFlushContext { * Requires pausing writes. * * A very short operation. + * @param flushOpSeqId */ - void prepare(); + void prepareFlushToDisk(long flushOpSeqId); /** * Flush the cache (create the new store file) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegment.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegment.java new file mode 100644 index 0000000..b13739f --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegment.java @@ -0,0 +1,154 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.client.Scan; + +/** + * This is an abstraction of a segment maintained in a memstore, e.g., the active + * cell set or its snapshot. + * + * This abstraction facilitates the management of the compaction pipeline and the shifts of these + * segments from active set to snapshot set in the default implementation. + */ +public abstract class StoreSegment { + + private final TimeRangeTracker timeRangeTracker; + protected volatile boolean tagsPresent; + + protected StoreSegment() { + this.timeRangeTracker = new TimeRangeTracker(); + this.tagsPresent = false; + } + + protected StoreSegment(StoreSegment segment) { + this.timeRangeTracker = segment.getTimeRangeTracker(); + this.tagsPresent = segment.isTagsPresent(); + } + + public boolean shouldSeek(Scan scan, long oldestUnexpiredTS) { + return (getTimeRangeTracker().includesTimeRange(scan.getTimeRange()) + && (getTimeRangeTracker().getMaximumTimestamp() >= + oldestUnexpiredTS)); + } + + public long getMinTimestamp() { + return getTimeRangeTracker().getMinimumTimestamp(); + } + + public boolean isTagsPresent() { + return tagsPresent; + } + + /** + * Creates the scanner that is able to scan the concrete segment + * @param readPoint + * @return a scanner for the given read point + */ + public abstract StoreSegmentScanner getScanner(long readPoint); + + /** + * Returns whether the segment has any cells + * @return whether the segment has any cells + */ + public abstract boolean isEmpty(); + + /** + * Returns number of cells in segment + * @return number of cells in segment + */ + public abstract int getCellsCount(); + + /** + * Adds the given cell into the segment + * @param cell + * @return the change in the heap size + */ + public abstract long add(Cell cell); + + /** + * Removes the given cell from the segment + * @param cell + * @return the change in the heap size + */ + public abstract long rollback(Cell cell); + + /** + * Returns the first cell in the segment that has equal or greater key than the given cell + * @param cell + * @return the first cell in the segment that has equal or greater key than the given cell + */ + public abstract Cell getFirstAfter(Cell cell); + + /** + * Closing a segment before it is being discarded + */ + public abstract void close(); + + /** + * If the segment has a memory allocator the cell is being cloned to this space, and returned; + * otherwise the given cell is returned + * @param cell + * @return either the given cell or its clone + */ + public abstract Cell maybeCloneWithAllocator(Cell cell); + + /** + * Setting the heap size of the segment - used to account for different class overheads + * @param size + * @return this object + */ + public abstract StoreSegment setSize(long size); + + /** + * Returns the heap size of the segment + * @return the heap size of the segment + */ + public abstract long getSize(); + + /** + * Returns a set of all cells in the segment + * @return a set of all cells in the segment + */ + public abstract CellSet getCellSet(); + + // Debug methods + /** + * Dumps all cells of the segment into the given log + * @param log + */ + public abstract void dump(Log log); + + @Override + public String toString() { + String res = "Store segment of type "+this.getClass().getName()+"; "; + res += "isEmpty "+(isEmpty()?"yes":"no")+"; "; + res += "cellCount "+getCellsCount()+"; "; + res += "size "+getSize()+"; "; + res += "Min ts "+getMinTimestamp()+"; "; + return res; + } + + protected TimeRangeTracker getTimeRangeTracker() { + return timeRangeTracker; + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegmentFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegmentFactory.java new file mode 100644 index 0000000..8a31cba --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegmentFactory.java @@ -0,0 +1,86 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.util.ReflectionUtils; + +/** + * A singleton store segment factory. + * Generate concrete store segments. + */ +public final class StoreSegmentFactory { + + static final String USEMSLAB_KEY = "hbase.hregion.memstore.mslab.enabled"; + static final boolean USEMSLAB_DEFAULT = true; + static final String MSLAB_CLASS_NAME = "hbase.regionserver.mslab.class"; + + private StoreSegmentFactory() {} + private static StoreSegmentFactory instance = new StoreSegmentFactory(); + public static StoreSegmentFactory instance() { return instance; } + + public ImmutableSegment createImmutableSegment(final Configuration conf, + final CellComparator comparator, long size) { + MemStoreLAB memStoreLAB = getMemStoreLAB(conf); + MutableSegment segment = generateMutableSegment(conf, comparator, memStoreLAB, size); + return createImmutableSegment(conf, segment); + } + + public ImmutableSegment createImmutableSegment(CellComparator comparator, + long size) { + MutableSegment segment = generateMutableSegment(null, comparator, null, size); + return createImmutableSegment(null, segment); + } + + public ImmutableSegment createImmutableSegment(final Configuration conf, MutableSegment segment) { + return generateImmutableSegment(conf, segment); + } + public MutableSegment createMutableSegment(final Configuration conf, + CellComparator comparator, long size) { + MemStoreLAB memStoreLAB = getMemStoreLAB(conf); + return generateMutableSegment(conf, comparator, memStoreLAB, size); + } + + //****** private methods to instantiate concrete store segments **********// + + private ImmutableSegment generateImmutableSegment(final Configuration conf, + MutableSegment segment) { + // TBD use configuration to set type of segment + return new ImmutableSegmentAdapter(segment); + } + private MutableSegment generateMutableSegment( + final Configuration conf, CellComparator comparator, MemStoreLAB memStoreLAB, long size) { + // TBD use configuration to set type of segment + CellSet set = new CellSet(comparator); + return new MutableCellSetSegment(set, memStoreLAB, size, comparator); + } + + private MemStoreLAB getMemStoreLAB(Configuration conf) { + MemStoreLAB memStoreLAB = null; + if (conf.getBoolean(USEMSLAB_KEY, USEMSLAB_DEFAULT)) { + String className = conf.get(MSLAB_CLASS_NAME, HeapMemStoreLAB.class.getName()); + memStoreLAB = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class }, new Object[] { conf }); + //memStoreLAB = new MemStoreLAB(conf, MemStoreChunkPool.getPool(conf)); + } + return memStoreLAB; + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegmentScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegmentScanner.java new file mode 100644 index 0000000..0c93ef8 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreSegmentScanner.java @@ -0,0 +1,44 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.client.Scan; + +/** + * An interface for store segment scanner, both for memory segment (memstore segment) and disk + * segment (file). + */ +public interface StoreSegmentScanner extends KeyValueScanner { + + /** + * Set the sequence id of the scanner. + * This is used to determine an order between memory segment scanners. + * @param x a unique sequence id + */ + public void setSequenceID(long x); + + /** + * Returns whether the given scan should seek in this segment + * @param scan + * @param oldestUnexpiredTS + * @return whether the given scan should seek in this segment + */ + public boolean shouldSeek(Scan scan, long oldestUnexpiredTS); + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/VersionedSegmentsList.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/VersionedSegmentsList.java new file mode 100644 index 0000000..67a690e --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/VersionedSegmentsList.java @@ -0,0 +1,54 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +import java.util.LinkedList; + +/** + * A list of segment managers coupled with the version of the memstore (version at the time it was + * created). + * This structure helps to guarantee that the compaction pipeline updates after the compaction is + * updated in a consistent (atomic) way. + * Specifically, swapping some of the elements in a compaction pipeline with a new compacted + * element is permitted only if the pipeline version is the same as the version attached to the + * elements. + * + */ +@InterfaceAudience.Private +public class VersionedSegmentsList { + + private final LinkedList storeSegments; + private final long version; + + public VersionedSegmentsList( + LinkedList storeSegments, long version) { + this.storeSegments = storeSegments; + this.version = version; + } + + public LinkedList getStoreSegments() { + return storeSegments; + } + + public long getVersion() { + return version; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java index 0e4a585..cf13f2b1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java @@ -1087,6 +1087,20 @@ public class FSHLog implements WAL { } /** + * updates the seuence number of a specific store. + * depending on the flag: replaces current seq number if the given seq id is bigger, + * or even if it is lower than existing one + * @param encodedRegionName + * @param familyName + * @param sequenceid + * @param onlyIfGreater + */ + @Override public void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid, + boolean onlyIfGreater) { + sequenceIdAccounting.updateStore(encodedRegionName,familyName,sequenceid,onlyIfGreater); + } + + /** * Thread to runs the hdfs sync call. This call takes a while to complete. This is the longest * pole adding edits to the WAL and this must complete to be sure all edits persisted. We run * multiple threads sync'ng rather than one that just syncs in series so we have better diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceIdAccounting.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceIdAccounting.java index 6e10f3c..2617722 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceIdAccounting.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceIdAccounting.java @@ -17,6 +17,12 @@ */ package org.apache.hadoop.hbase.regionserver.wal; +import com.google.common.collect.Maps; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.Bytes; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -27,13 +33,6 @@ import java.util.TreeMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.util.Bytes; - -import com.google.common.collect.Maps; - /** * Accounting of sequence ids per region and then by column family. So we can our accounting * current, call startCacheFlush and then finishedCacheFlush or abortCacheFlush so this instance @@ -163,6 +162,35 @@ class SequenceIdAccounting { } } + void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceId, + boolean onlyIfGreater) { + if(sequenceId == null) return; + Long highest = this.highestSequenceIds.get(encodedRegionName); + if(sequenceId > highest) { + this.highestSequenceIds.put(encodedRegionName,sequenceId); + } + synchronized (this.tieLock) { + ConcurrentMap m = getOrCreateLowestSequenceIds(encodedRegionName); + boolean replaced = false; + while (!replaced) { + Long l = m.get(familyName); + if (l == null) { + m.put(familyName, sequenceId); + replaced = true; + } else if (onlyIfGreater) { + if (sequenceId > l) { + replaced = m.replace(familyName, l, sequenceId); + } else { + return; + } + } else { // replace even if sequence id is not greater than l + m.put(familyName, sequenceId); + return; + } + } + } + } + ConcurrentMap getOrCreateLowestSequenceIds(byte[] encodedRegionName) { // Intentionally, this access is done outside of this.regionSequenceIdLock. Done per append. ConcurrentMap m = this.lowestUnflushedSequenceIds.get(encodedRegionName); @@ -360,4 +388,4 @@ class SequenceIdAccounting { } return toFlush == null? null: toFlush.toArray(new byte[][] { HConstants.EMPTY_BYTE_ARRAY }); } -} \ No newline at end of file +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DisabledWALProvider.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DisabledWALProvider.java index 191d546..68bf9ea 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DisabledWALProvider.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DisabledWALProvider.java @@ -17,16 +17,8 @@ */ package org.apache.hadoop.hbase.wal; -import java.io.IOException; -import java.util.List; -import java.util.Set; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; @@ -34,12 +26,20 @@ import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.util.FSUtils; - -// imports for things that haven't moved from regionserver.wal yet. +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; +import org.apache.hadoop.hbase.util.FSUtils; + +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +// imports for things that haven't moved from regionserver.wal yet. /** * No-op implementation of {@link WALProvider} used when the WAL is disabled. @@ -172,6 +172,10 @@ class DisabledWALProvider implements WALProvider { } @Override + public void updateStore(byte[] encodedRegionName, byte[] familyName, + Long sequenceid, boolean onlyIfGreater) { return; } + + @Override public void sync() { if (!this.listeners.isEmpty()) { for (WALActionsListener listener : this.listeners) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WAL.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WAL.java index ce34c98..9193eae 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WAL.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WAL.java @@ -19,20 +19,13 @@ package org.apache.hadoop.hbase.wal; -import java.io.Closeable; -import java.io.IOException; -import java.util.List; -import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.hadoop.hbase.classification.InterfaceStability; -import org.apache.hadoop.hbase.classification.InterfaceAudience; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; - -// imports we use from yet-to-be-moved regionsever.wal +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.regionserver.wal.CompressionContext; import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; import org.apache.hadoop.hbase.regionserver.wal.HLogKey; @@ -40,7 +33,13 @@ import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; -import com.google.common.annotations.VisibleForTesting; +import java.io.Closeable; +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; + +// imports we use from yet-to-be-moved regionsever.wal /** * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides @@ -127,9 +126,21 @@ public interface WAL { throws IOException; /** - * Sync what we have in the WAL. - * @throws IOException + * updates the seuence number of a specific store. + * depending on the flag: replaces current seq number if the given seq id is bigger, + * or even if it is lower than existing one + * @param encodedRegionName + * @param familyName + * @param sequenceid + * @param onlyIfGreater */ + void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid, + boolean onlyIfGreater); + + /** + * Sync what we have in the WAL. + * @throws IOException + */ void sync() throws IOException; /** diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index d471ec9..db4ab8d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -2080,10 +2080,23 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { */ public HRegion createLocalHRegion(TableName tableName, byte[] startKey, byte[] stopKey, boolean isReadOnly, Durability durability, WAL wal, byte[]... families) throws IOException { + return createLocalHRegionWithInMemoryFlags(tableName,startKey, stopKey, isReadOnly, + durability, wal, null, families); + } + + public HRegion createLocalHRegionWithInMemoryFlags(TableName tableName, byte[] startKey, + byte[] stopKey, + boolean isReadOnly, Durability durability, WAL wal, boolean[] inMemory, byte[]... families) + throws IOException { HTableDescriptor htd = new HTableDescriptor(tableName); htd.setReadOnly(isReadOnly); + int i=0; for (byte[] family : families) { HColumnDescriptor hcd = new HColumnDescriptor(family); + if(inMemory != null && i < inMemory.length) { + hcd.setInMemory(inMemory[i]); + } + i++; // Set default to be three versions. hcd.setMaxVersions(Integer.MAX_VALUE); htd.addFamily(hcd); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestGlobalMemStoreSize.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestGlobalMemStoreSize.java index ab0e6b4..4cc9bb5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestGlobalMemStoreSize.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestGlobalMemStoreSize.java @@ -18,13 +18,6 @@ */ package org.apache.hadoop.hbase; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -42,6 +35,13 @@ import org.apache.hadoop.hbase.util.Threads; import org.junit.Test; import org.junit.experimental.categories.Category; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + /** * Test HBASE-3694 whether the GlobalMemStoreSize is the same as the summary * of all the online region's MemStoreSize @@ -141,7 +141,7 @@ public class TestGlobalMemStoreSize { } /** - * Flush and log stats on flush + * Flush (force) and log stats on flush * @param r * @param server * @throws IOException diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java index bb216b6..3af4537 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java @@ -17,13 +17,7 @@ */ package org.apache.hadoop.hbase; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.CountDownLatch; +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -54,7 +48,14 @@ import org.apache.hadoop.hbase.wal.WAL; import org.junit.Test; import org.junit.experimental.categories.Category; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicLong; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Test for the case where a regionserver going down has enough cycles to do damage to regions @@ -207,6 +208,19 @@ public class TestIOFencing { } super.completeCompaction(compactedFiles); } + + @Override public void setForceFlushToDisk() { + } + + @Override public boolean isMemStoreInCompaction() { + return false; + } + + @Override public void flushInMemory(long flushOpSeqId) { + } + + @Override public void updateLowestUnflushedSequenceIdInWal() { + } } private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); @@ -352,4 +366,4 @@ public class TestIOFencing { TEST_UTIL.shutdownMiniCluster(); } } -} \ No newline at end of file +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java index f6ade32..b7b3af2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java @@ -19,22 +19,6 @@ package org.apache.hadoop.hbase.io; -import java.io.IOException; -import java.lang.management.ManagementFactory; -import java.lang.management.RuntimeMXBean; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Map; -import java.util.TreeMap; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.CopyOnWriteArraySet; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.KeyValue; @@ -42,9 +26,9 @@ import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; -import org.apache.hadoop.hbase.io.hfile.LruCachedBlock; import org.apache.hadoop.hbase.io.hfile.LruBlockCache; -import org.apache.hadoop.hbase.regionserver.CellSkipListSet; +import org.apache.hadoop.hbase.io.hfile.LruCachedBlock; +import org.apache.hadoop.hbase.regionserver.CellSet; import org.apache.hadoop.hbase.regionserver.DefaultMemStore; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HStore; @@ -56,6 +40,22 @@ import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CopyOnWriteArraySet; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantReadWriteLock; + import static org.junit.Assert.assertEquals; /** @@ -237,8 +237,8 @@ public class TestHeapSize { assertEquals(expected, actual); } - // CellSkipListSet - cl = CellSkipListSet.class; + // CellSet + cl = CellSet.class; expected = ClassSize.estimateBase(cl, false); actual = ClassSize.CELL_SKIPLIST_SET; if (expected != actual) { @@ -305,15 +305,16 @@ public class TestHeapSize { // DefaultMemStore Deep Overhead actual = DefaultMemStore.DEEP_OVERHEAD; expected = ClassSize.estimateBase(cl, false); - expected += ClassSize.estimateBase(AtomicLong.class, false); - expected += (2 * ClassSize.estimateBase(CellSkipListSet.class, false)); + expected += (2 * ClassSize.estimateBase(AtomicLong.class, false)); + expected += (2 * ClassSize.estimateBase(CellSet.class, false)); expected += (2 * ClassSize.estimateBase(ConcurrentSkipListMap.class, false)); expected += (2 * ClassSize.estimateBase(TimeRangeTracker.class, false)); if(expected != actual) { ClassSize.estimateBase(cl, true); ClassSize.estimateBase(AtomicLong.class, true); - ClassSize.estimateBase(CellSkipListSet.class, true); - ClassSize.estimateBase(CellSkipListSet.class, true); + ClassSize.estimateBase(AtomicLong.class, true); + ClassSize.estimateBase(CellSet.class, true); + ClassSize.estimateBase(CellSet.class, true); ClassSize.estimateBase(ConcurrentSkipListMap.class, true); ClassSize.estimateBase(ConcurrentSkipListMap.class, true); ClassSize.estimateBase(TimeRangeTracker.class, true); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellSkipListSet.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellSkipListSet.java index 684839d..f5dc1f6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellSkipListSet.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellSkipListSet.java @@ -34,8 +34,8 @@ import org.junit.experimental.categories.Category; @Category({RegionServerTests.class, SmallTests.class}) public class TestCellSkipListSet extends TestCase { - private final CellSkipListSet csls = - new CellSkipListSet(CellComparator.COMPARATOR); + private final CellSet csls = + new CellSet(CellComparator.COMPARATOR); protected void setUp() throws Exception { super.setUp(); @@ -163,4 +163,4 @@ public class TestCellSkipListSet extends TestCase { assertTrue(Bytes.equals(head.first().getValueArray(), head.first().getValueOffset(), head.first().getValueLength(), value2, 0, value2.length)); } -} \ No newline at end of file +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactedMemStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactedMemStore.java new file mode 100644 index 0000000..353151e --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactedMemStore.java @@ -0,0 +1,1394 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import com.google.common.base.Joiner; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import junit.framework.TestCase; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeepDeletedCells; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdge; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Threads; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +/** + * compacted memstore test case + */ +@Category(MediumTests.class) +public class TestCompactedMemStore extends TestCase { + private static final Log LOG = LogFactory.getLog(TestCompactedMemStore.class); + private static final int ROW_COUNT = 10; + private static final int QUALIFIER_COUNT = ROW_COUNT; + private static final byte[] FAMILY = Bytes.toBytes("column"); + private static MemStoreChunkPool chunkPool; + private CompactedMemStore cms; + private HRegion region; + private HStore store; + private MultiVersionConcurrencyControl mvcc; + private AtomicLong startSeqNum = new AtomicLong(0); + + ////////////////////////////////////////////////////////////////////////////// + // Helpers + ////////////////////////////////////////////////////////////////////////////// + private static byte[] makeQualifier(final int i1, final int i2) { + return Bytes.toBytes(Integer.toString(i1) + ";" + + Integer.toString(i2)); + } + + // private KeyValue getDeleteKV(byte [] row) { + // return new KeyValue(row, Bytes.toBytes("test_col"), null, + // HConstants.LATEST_TIMESTAMP, KeyValue.Type.Delete, null); + // } + // + // private KeyValue getKV(byte [] row, byte [] value) { + // return new KeyValue(row, Bytes.toBytes("test_col"), null, + // HConstants.LATEST_TIMESTAMP, value); + // } + private static void addRows(int count, final CompactedMemStore mem) { + long nanos = System.nanoTime(); + + for (int i = 0; i < count; i++) { + if (i % 1000 == 0) { + + System.out.println(i + " Took for 1k usec: " + (System.nanoTime() - nanos) / 1000); + nanos = System.nanoTime(); + } + long timestamp = System.currentTimeMillis(); + + for (int ii = 0; ii < QUALIFIER_COUNT; ii++) { + byte[] row = Bytes.toBytes(i); + byte[] qf = makeQualifier(i, ii); + mem.add(new KeyValue(row, FAMILY, qf, timestamp, qf)); + } + } + } + + static void doScan(AbstractMemStore ms, int iteration) throws IOException { + long nanos = System.nanoTime(); + KeyValueScanner s = ms.getScanners(0).get(0); + s.seek(KeyValueUtil.createFirstOnRow(new byte[] { })); + + System.out.println(iteration + " create/seek took: " + (System.nanoTime() - nanos) / 1000); + int cnt = 0; + while (s.next() != null) ++cnt; + + System.out.println(iteration + " took usec: " + (System.nanoTime() - nanos) / 1000 + " for: " + + cnt); + + } + + @Override + public void tearDown() throws Exception { + chunkPool.clearChunks(); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + this.mvcc = new MultiVersionConcurrencyControl(); + Configuration conf = new Configuration(); + conf.setBoolean(StoreSegmentFactory.USEMSLAB_KEY, true); + conf.setFloat(MemStoreChunkPool.CHUNK_POOL_MAXSIZE_KEY, 0.2f); + conf.setInt(HRegion.MEMSTORE_PERIODIC_FLUSH_INTERVAL, 1000); + HBaseTestingUtility hbaseUtility = HBaseTestingUtility.createLocalHTU(conf); + HColumnDescriptor hcd = new HColumnDescriptor(FAMILY); + this.region = hbaseUtility.createTestRegion("foobar", hcd); + this.store = new HStore(region, hcd, conf); + this.cms = new CompactedMemStore(HBaseConfiguration.create(), CellComparator.COMPARATOR, store); + chunkPool = MemStoreChunkPool.getPool(conf); + assertTrue(chunkPool != null); + } + + public void testPutSameKey() { + byte[] bytes = Bytes.toBytes(getName()); + KeyValue kv = new KeyValue(bytes, bytes, bytes, bytes); + this.cms.add(kv); + byte[] other = Bytes.toBytes("somethingelse"); + KeyValue samekey = new KeyValue(bytes, bytes, bytes, other); + this.cms.add(samekey); + Cell found = this.cms.getActive().first(); + assertEquals(1, this.cms.getActive().getCellsCount()); + assertTrue(Bytes.toString(found.getValueArray()), CellUtil.matchingValue(samekey, found)); + } + + /** + * Test memstore snapshot happening while scanning. + * + * @throws IOException + */ + public void testScanAcrossSnapshot() throws IOException { + int rowCount = addRows(this.cms); + List memstorescanners = this.cms.getScanners(0); + Scan scan = new Scan(); + List result = new ArrayList(); + ScanInfo scanInfo = + new ScanInfo(null, 0, 1, HConstants.LATEST_TIMESTAMP, KeepDeletedCells.FALSE, 0, + this.cms.getComparator()); + ScanType scanType = ScanType.USER_SCAN; + StoreScanner s = new StoreScanner(scan, scanInfo, scanType, null, memstorescanners); + int count = 0; + try { + while (s.next(result)) { + LOG.info(result); + count++; + // Row count is same as column count. + assertEquals(rowCount, result.size()); + result.clear(); + } + } finally { + s.close(); + } + assertEquals(rowCount, count); + for (KeyValueScanner scanner : memstorescanners) { + scanner.close(); + } + + memstorescanners = this.cms.getScanners(mvcc.getReadPoint()); + // Now assert can count same number even if a snapshot mid-scan. + s = new StoreScanner(scan, scanInfo, scanType, null, memstorescanners); + count = 0; + try { + while (s.next(result)) { + LOG.info(result); + // Assert the stuff is coming out in right order. + assertTrue(CellUtil.matchingRow(result.get(0), Bytes.toBytes(count))); + count++; + // Row count is same as column count. + assertEquals(rowCount, result.size()); + if (count == 2) { + // the test should be still correct although the compaction is starting in the background + // there should be nothing to compact + this.cms.snapshot(0); + LOG.info("Snapshotted"); + } + result.clear(); + } + } finally { + s.close(); + } + + // snapshot immediately starts compaction, but even with the compaction nothing + // should be compacted (unique keys) and the test should still be correct... + assertEquals(rowCount, count); + for (KeyValueScanner scanner : memstorescanners) { + scanner.close(); + } + memstorescanners = this.cms.getScanners(mvcc.getReadPoint()); + // Assert that new values are seen in kvset as we scan. + long ts = System.currentTimeMillis(); + s = new StoreScanner(scan, scanInfo, scanType, null, memstorescanners); + count = 0; + int snapshotIndex = 5; + try { + while (s.next(result)) { + LOG.info(result); + // Assert the stuff is coming out in right order. + assertTrue(CellUtil.matchingRow(result.get(0), Bytes.toBytes(count))); + // Row count is same as column count. + assertEquals("count=" + count + ", result=" + result, rowCount, result.size()); + count++; + if (count == snapshotIndex) { + MemStoreSnapshot snapshot = this.cms.snapshot(0); + this.cms.clearSnapshot(snapshot.getId()); + // Added more rows into kvset. But the scanner wont see these rows. + addRows(this.cms, ts); + LOG.info("Snapshotted, cleared it and then added values (which wont be seen)"); + } + result.clear(); + } + } finally { + s.close(); + } + assertEquals(rowCount, count); + } + + /** + * A simple test which verifies the 3 possible states when scanning across snapshot. + * + * @throws IOException + * @throws CloneNotSupportedException + */ + public void testScanAcrossSnapshot2() throws IOException, CloneNotSupportedException { + // we are going to the scanning across snapshot with two kvs + // kv1 should always be returned before kv2 + final byte[] one = Bytes.toBytes(1); + final byte[] two = Bytes.toBytes(2); + final byte[] f = Bytes.toBytes("f"); + final byte[] q = Bytes.toBytes("q"); + final byte[] v = Bytes.toBytes(3); + + final KeyValue kv1 = new KeyValue(one, f, q, 10, v); + final KeyValue kv2 = new KeyValue(two, f, q, 10, v); + + // use case 1: both kvs in kvset + this.cms.add(kv1.clone()); + this.cms.add(kv2.clone()); + verifyScanAcrossSnapshot2(kv1, kv2); + + // use case 2: both kvs in snapshot + this.cms.snapshot(0); + verifyScanAcrossSnapshot2(kv1, kv2); + + // use case 3: first in snapshot second in kvset + this.cms = new CompactedMemStore(HBaseConfiguration.create(), + CellComparator.COMPARATOR, store); + this.cms.add(kv1.clone()); + // As compaction is starting in the background the repetition + // of the k1 might be removed BUT the scanners created earlier + // should look on the OLD MutableCellSetSegment, so this should be OK... + this.cms.snapshot(0); + this.cms.add(kv2.clone()); + verifyScanAcrossSnapshot2(kv1,kv2); + } + + private void verifyScanAcrossSnapshot2(KeyValue kv1, KeyValue kv2) + throws IOException { + List memstorescanners = this.cms.getScanners(mvcc.getReadPoint()); + assertEquals(1, memstorescanners.size()); + final KeyValueScanner scanner = memstorescanners.get(0); + scanner.seek(KeyValueUtil.createFirstOnRow(HConstants.EMPTY_START_ROW)); + assertEquals(kv1, scanner.next()); + assertEquals(kv2, scanner.next()); + assertNull(scanner.next()); + } + + private void assertScannerResults(KeyValueScanner scanner, KeyValue[] expected) + throws IOException { + scanner.seek(KeyValueUtil.createFirstOnRow(new byte[] { })); + List returned = Lists.newArrayList(); + + while (true) { + Cell next = scanner.next(); + if (next == null) break; + returned.add(next); + } + + assertTrue( + "Got:\n" + Joiner.on("\n").join(returned) + + "\nExpected:\n" + Joiner.on("\n").join(expected), + Iterables.elementsEqual(Arrays.asList(expected), returned)); + assertNull(scanner.peek()); + } + + public void testMemstoreConcurrentControl() throws IOException { + final byte[] row = Bytes.toBytes(1); + final byte[] f = Bytes.toBytes("family"); + final byte[] q1 = Bytes.toBytes("q1"); + final byte[] q2 = Bytes.toBytes("q2"); + final byte[] v = Bytes.toBytes("value"); + +// mvcc.advanceTo(this.startSeqNum.incrementAndGet()); + MultiVersionConcurrencyControl.WriteEntry w = mvcc.begin(); +// mvcc.beginWithSeqNum(this.startSeqNum.incrementAndGet()); + + KeyValue kv1 = new KeyValue(row, f, q1, v); + kv1.setSequenceId(w.getWriteNumber()); + cms.add(kv1); + + KeyValueScanner s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { }); + + mvcc.completeAndWait(w); + + s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv1 }); + +// w = mvcc.beginWithSeqNum(this.startSeqNum.incrementAndGet()); +// mvcc.advanceTo(this.startSeqNum.incrementAndGet()); + w = mvcc.begin(); + KeyValue kv2 = new KeyValue(row, f, q2, v); + kv2.setSequenceId(w.getWriteNumber()); + cms.add(kv2); + + s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv1 }); + + mvcc.completeAndWait(w); + + s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv1, kv2 }); + } + + /** + * Regression test for HBASE-2616, HBASE-2670. + * When we insert a higher-memstoreTS version of a cell but with + * the same timestamp, we still need to provide consistent reads + * for the same scanner. + */ + public void testMemstoreEditsVisibilityWithSameKey() throws IOException { + final byte[] row = Bytes.toBytes(1); + final byte[] f = Bytes.toBytes("family"); + final byte[] q1 = Bytes.toBytes("q1"); + final byte[] q2 = Bytes.toBytes("q2"); + final byte[] v1 = Bytes.toBytes("value1"); + final byte[] v2 = Bytes.toBytes("value2"); + + // INSERT 1: Write both columns val1 +// mvcc.advanceTo(this.startSeqNum.incrementAndGet()); + MultiVersionConcurrencyControl.WriteEntry w = mvcc.begin(); +// mvcc.beginWithSeqNum(this.startSeqNum.incrementAndGet()); + + KeyValue kv11 = new KeyValue(row, f, q1, v1); + kv11.setSequenceId(w.getWriteNumber()); + cms.add(kv11); + + KeyValue kv12 = new KeyValue(row, f, q2, v1); + kv12.setSequenceId(w.getWriteNumber()); + cms.add(kv12); + mvcc.completeAndWait(w); + + // BEFORE STARTING INSERT 2, SEE FIRST KVS + KeyValueScanner s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kv12 }); + + // START INSERT 2: Write both columns val2 +// w = mvcc.beginWithSeqNum(this.startSeqNum.incrementAndGet()); +// mvcc.advanceTo(this.startSeqNum.incrementAndGet()); + w = mvcc.begin(); + KeyValue kv21 = new KeyValue(row, f, q1, v2); + kv21.setSequenceId(w.getWriteNumber()); + cms.add(kv21); + + KeyValue kv22 = new KeyValue(row, f, q2, v2); + kv22.setSequenceId(w.getWriteNumber()); + cms.add(kv22); + + // BEFORE COMPLETING INSERT 2, SEE FIRST KVS + s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kv12 }); + + // COMPLETE INSERT 2 + mvcc.completeAndWait(w); + + // NOW SHOULD SEE NEW KVS IN ADDITION TO OLD KVS. + // See HBASE-1485 for discussion about what we should do with + // the duplicate-TS inserts + s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv21, kv11, kv22, kv12 }); + } + + /** + * When we insert a higher-memstoreTS deletion of a cell but with + * the same timestamp, we still need to provide consistent reads + * for the same scanner. + */ + public void testMemstoreDeletesVisibilityWithSameKey() throws IOException { + final byte[] row = Bytes.toBytes(1); + final byte[] f = Bytes.toBytes("family"); + final byte[] q1 = Bytes.toBytes("q1"); + final byte[] q2 = Bytes.toBytes("q2"); + final byte[] v1 = Bytes.toBytes("value1"); + // INSERT 1: Write both columns val1 +// mvcc.advanceTo(this.startSeqNum.incrementAndGet()); + MultiVersionConcurrencyControl.WriteEntry w = mvcc.begin(); +// mvcc.beginWithSeqNum(this.startSeqNum.incrementAndGet()); + + KeyValue kv11 = new KeyValue(row, f, q1, v1); + kv11.setSequenceId(w.getWriteNumber()); + cms.add(kv11); + + KeyValue kv12 = new KeyValue(row, f, q2, v1); + kv12.setSequenceId(w.getWriteNumber()); + cms.add(kv12); + mvcc.completeAndWait(w); + + // BEFORE STARTING INSERT 2, SEE FIRST KVS + KeyValueScanner s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kv12 }); + + // START DELETE: Insert delete for one of the columns +// mvcc.advanceTo(this.startSeqNum.incrementAndGet()); + w = mvcc.begin(); +// w = mvcc.beginWithSeqNum(this.startSeqNum.incrementAndGet()); + KeyValue kvDel = new KeyValue(row, f, q2, kv11.getTimestamp(), + KeyValue.Type.DeleteColumn); + kvDel.setSequenceId(w.getWriteNumber()); + cms.add(kvDel); + + // BEFORE COMPLETING DELETE, SEE FIRST KVS + s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kv12 }); + + // COMPLETE DELETE + mvcc.completeAndWait(w); + + // NOW WE SHOULD SEE DELETE + s = this.cms.getScanners(mvcc.getReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kvDel, kv12 }); + } + + public void testReadOwnWritesUnderConcurrency() throws Throwable { + + int NUM_THREADS = 8; + + ReadOwnWritesTester threads[] = new ReadOwnWritesTester[NUM_THREADS]; + AtomicReference caught = new AtomicReference(); + + for (int i = 0; i < NUM_THREADS; i++) { + threads[i] = new ReadOwnWritesTester(i, cms, mvcc, caught, this.startSeqNum); + threads[i].start(); + } + + for (int i = 0; i < NUM_THREADS; i++) { + threads[i].join(); + } + + if (caught.get() != null) { + throw caught.get(); + } + } + + ///////////////////////////////-/-/-/-//////////////////////////////////////////// + // Get tests + //////////////////////////////-/-/-/-///////////////////////////////////////////// + + /** + * Test memstore snapshots + * + * @throws IOException + */ + public void testSnapshotting() throws IOException { + final int snapshotCount = 5; + // Add some rows, run a snapshot. Do it a few times. + for (int i = 0; i < snapshotCount; i++) { + addRows(this.cms); + runSnapshot(this.cms, true); + assertEquals("History not being cleared", 0, this.cms.getSnapshot().getCellsCount()); + } + } + + public void testMultipleVersionsSimple() throws Exception { + byte[] row = Bytes.toBytes("testRow"); + byte[] family = Bytes.toBytes("testFamily"); + byte[] qf = Bytes.toBytes("testQualifier"); + long[] stamps = { 1, 2, 3 }; + byte[][] values = { Bytes.toBytes("value0"), Bytes.toBytes("value1"), + Bytes.toBytes("value2") }; + KeyValue key0 = new KeyValue(row, family, qf, stamps[0], values[0]); + KeyValue key1 = new KeyValue(row, family, qf, stamps[1], values[1]); + KeyValue key2 = new KeyValue(row, family, qf, stamps[2], values[2]); + + cms.add(key0); + cms.add(key1); + cms.add(key2); + + assertTrue("Expected memstore to hold 3 values, actually has " + + cms.getActive().getCellsCount(), cms.getActive().getCellsCount() == 3); + } + + /** + * Test getNextRow from memstore + * + * @throws InterruptedException + */ + public void testGetNextRow() throws Exception { + addRows(this.cms); + // Add more versions to make it a little more interesting. + Thread.sleep(1); + addRows(this.cms); + Cell closestToEmpty = this.cms.getNextRow(KeyValue.LOWESTKEY); + assertTrue(KeyValue.COMPARATOR.compareRows(closestToEmpty, + new KeyValue(Bytes.toBytes(0), System.currentTimeMillis())) == 0); + for (int i = 0; i < ROW_COUNT; i++) { + Cell nr = this.cms.getNextRow(new KeyValue(Bytes.toBytes(i), + System.currentTimeMillis())); + if (i + 1 == ROW_COUNT) { + assertEquals(nr, null); + } else { + assertTrue(KeyValue.COMPARATOR.compareRows(nr, + new KeyValue(Bytes.toBytes(i + 1), System.currentTimeMillis())) == 0); + } + } + //starting from each row, validate results should contain the starting row + for (int startRowId = 0; startRowId < ROW_COUNT; startRowId++) { + ScanInfo scanInfo = new ScanInfo(FAMILY, 0, 1, Integer.MAX_VALUE, KeepDeletedCells.FALSE, + 0, this.cms.getComparator()); + ScanType scanType = ScanType.USER_SCAN; + InternalScanner scanner = new StoreScanner(new Scan( + Bytes.toBytes(startRowId)), scanInfo, scanType, null, + cms.getScanners(0)); + List results = new ArrayList(); + for (int i = 0; scanner.next(results); i++) { + int rowId = startRowId + i; + Cell left = results.get(0); + byte[] row1 = Bytes.toBytes(rowId); + assertTrue("Row name", + KeyValue.COMPARATOR.compareRows(left.getRowArray(), left.getRowOffset(), + (int) left.getRowLength(), row1, 0, row1.length) == 0); + assertEquals("Count of columns", QUALIFIER_COUNT, results.size()); + List row = new ArrayList(); + for (Cell kv : results) { + row.add(kv); + } + isExpectedRowWithoutTimestamps(rowId, row); + // Clear out set. Otherwise row results accumulate. + results.clear(); + } + } + } + + public void testGet_memstoreAndSnapShot() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] qf4 = Bytes.toBytes("testqualifier4"); + byte[] qf5 = Bytes.toBytes("testqualifier5"); + byte[] val = Bytes.toBytes("testval"); + + //Setting up memstore + cms.add(new KeyValue(row, fam, qf1, val)); + cms.add(new KeyValue(row, fam, qf2, val)); + cms.add(new KeyValue(row, fam, qf3, val)); + //Pushing to pipeline + cms.flushInMemory(0); + assertEquals(0, cms.getSnapshot().getCellsCount()); + //Creating a snapshot + cms.setForceFlushToDisk().snapshot(0); + assertEquals(3, cms.getSnapshot().getCellsCount()); + //Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf4, val)); + cms.add(new KeyValue(row, fam, qf5, val)); + assertEquals(2, cms.getActive().getCellsCount()); + } + + ////////////////////////////////////////////////////////////////////////////// + // Delete tests + ////////////////////////////////////////////////////////////////////////////// + public void testGetWithDelete() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier"); + byte[] val = Bytes.toBytes("testval"); + + long ts1 = System.nanoTime(); + KeyValue put1 = new KeyValue(row, fam, qf1, ts1, val); + long ts2 = ts1 + 1; + KeyValue put2 = new KeyValue(row, fam, qf1, ts2, val); + long ts3 = ts2 + 1; + KeyValue put3 = new KeyValue(row, fam, qf1, ts3, val); + cms.add(put1); + cms.add(put2); + cms.add(put3); + + assertEquals(3, cms.getActive().getCellsCount()); + + KeyValue del2 = new KeyValue(row, fam, qf1, ts2, KeyValue.Type.Delete, val); + cms.delete(del2); + + List expected = new ArrayList(); + expected.add(put3); + expected.add(del2); + expected.add(put2); + expected.add(put1); + + assertEquals(4, cms.getActive().getCellsCount()); + int i = 0; + for (Cell cell : cms.getActive().getCellSet()) { + assertEquals(expected.get(i++), cell); + } + } + + public void testGetWithDeleteColumn() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier"); + byte[] val = Bytes.toBytes("testval"); + + long ts1 = System.nanoTime(); + KeyValue put1 = new KeyValue(row, fam, qf1, ts1, val); + long ts2 = ts1 + 1; + KeyValue put2 = new KeyValue(row, fam, qf1, ts2, val); + long ts3 = ts2 + 1; + KeyValue put3 = new KeyValue(row, fam, qf1, ts3, val); + cms.add(put1); + cms.add(put2); + cms.add(put3); + + assertEquals(3, cms.getActive().getCellsCount()); + + KeyValue del2 = + new KeyValue(row, fam, qf1, ts2, KeyValue.Type.DeleteColumn, val); + cms.delete(del2); + + List expected = new ArrayList(); + expected.add(put3); + expected.add(del2); + expected.add(put2); + expected.add(put1); + + assertEquals(4, cms.getActive().getCellsCount()); + int i = 0; + for (Cell cell : cms.getActive().getCellSet()) { + assertEquals(expected.get(i++), cell); + } + } + + public void testGetWithDeleteFamily() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] val = Bytes.toBytes("testval"); + long ts = System.nanoTime(); + + KeyValue put1 = new KeyValue(row, fam, qf1, ts, val); + KeyValue put2 = new KeyValue(row, fam, qf2, ts, val); + KeyValue put3 = new KeyValue(row, fam, qf3, ts, val); + KeyValue put4 = new KeyValue(row, fam, qf3, ts + 1, val); + + cms.add(put1); + cms.add(put2); + cms.add(put3); + cms.add(put4); + + KeyValue del = + new KeyValue(row, fam, null, ts, KeyValue.Type.DeleteFamily, val); + cms.delete(del); + + List expected = new ArrayList(); + expected.add(del); + expected.add(put1); + expected.add(put2); + expected.add(put4); + expected.add(put3); + + assertEquals(5, cms.getActive().getCellsCount()); + int i = 0; + for (Cell cell : cms.getActive().getCellSet()) { + assertEquals(expected.get(i++), cell); + } + } + + public void testKeepDeleteInmemstore() { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf = Bytes.toBytes("testqualifier"); + byte[] val = Bytes.toBytes("testval"); + long ts = System.nanoTime(); + cms.add(new KeyValue(row, fam, qf, ts, val)); + KeyValue delete = new KeyValue(row, fam, qf, ts, KeyValue.Type.Delete, val); + cms.delete(delete); + assertEquals(2, cms.getActive().getCellsCount()); + assertEquals(delete, cms.getActive().first()); + } + + public void testRetainsDeleteVersion() throws IOException { + // add a put to memstore + cms.add(KeyValueTestUtil.create("row1", "fam", "a", 100, "dont-care")); + + // now process a specific delete: + KeyValue delete = KeyValueTestUtil.create( + "row1", "fam", "a", 100, KeyValue.Type.Delete, "dont-care"); + cms.delete(delete); + + assertEquals(2, cms.getActive().getCellsCount()); + assertEquals(delete, cms.getActive().first()); + } + + ////////////////////////////////////=================================================== + //Test for timestamps + //////////////////////////////////// + + public void testRetainsDeleteColumn() throws IOException { + // add a put to memstore + cms.add(KeyValueTestUtil.create("row1", "fam", "a", 100, "dont-care")); + + // now process a specific delete: + KeyValue delete = KeyValueTestUtil.create("row1", "fam", "a", 100, + KeyValue.Type.DeleteColumn, "dont-care"); + cms.delete(delete); + + assertEquals(2, cms.getActive().getCellsCount()); + assertEquals(delete, cms.getActive().first()); + } + + //////////////////////////////////// + //Test for upsert with MSLAB + //////////////////////////////////// + + public void testRetainsDeleteFamily() throws IOException { + // add a put to memstore + cms.add(KeyValueTestUtil.create("row1", "fam", "a", 100, "dont-care")); + + // now process a specific delete: + KeyValue delete = KeyValueTestUtil.create("row1", "fam", "a", 100, + KeyValue.Type.DeleteFamily, "dont-care"); + cms.delete(delete); + + assertEquals(2, cms.getActive().getCellsCount()); + assertEquals(delete, cms.getActive().first()); + } + + /** + * Test a pathological pattern that shows why we can't currently + * use the MSLAB for upsert workloads. This test inserts data + * in the following pattern: + * - row0001 through row1000 (fills up one 2M Chunk) + * - row0002 through row1001 (fills up another 2M chunk, leaves one reference + * to the first chunk + * - row0003 through row1002 (another chunk, another dangling reference) + * This causes OOME pretty quickly if we use MSLAB for upsert + * since each 2M chunk is held onto by a single reference. + */ + public void testUpsertMSLAB() throws Exception { + + int ROW_SIZE = 2048; + byte[] qualifier = new byte[ROW_SIZE - 4]; + + MemoryMXBean bean = ManagementFactory.getMemoryMXBean(); + for (int i = 0; i < 3; i++) { + System.gc(); + } + long usageBefore = bean.getHeapMemoryUsage().getUsed(); + + long size = 0; + long ts = 0; + + for (int newValue = 0; newValue < 1000; newValue++) { + for (int row = newValue; row < newValue + 1000; row++) { + byte[] rowBytes = Bytes.toBytes(row); + size += cms.updateColumnValue(rowBytes, FAMILY, qualifier, newValue, ++ts); + } + } + System.out.println("Wrote " + ts + " vals"); + for (int i = 0; i < 3; i++) { + System.gc(); + } + long usageAfter = bean.getHeapMemoryUsage().getUsed(); + System.out.println("Memory used: " + (usageAfter - usageBefore) + + " (heapsize: " + cms.heapSize() + + " size: " + size + ")"); + } + + //////////////////////////////////// + // Test for periodic memstore flushes + // based on time of oldest edit + //////////////////////////////////// + + /** + * Add keyvalues with a fixed memstoreTs, and checks that memstore size is decreased + * as older keyvalues are deleted from the memstore. + * + * @throws Exception + */ + public void testUpsertMemstoreSize() throws Exception { + long oldSize = cms.size(); + + List l = new ArrayList(); + KeyValue kv1 = KeyValueTestUtil.create("r", "f", "q", 100, "v"); + KeyValue kv2 = KeyValueTestUtil.create("r", "f", "q", 101, "v"); + KeyValue kv3 = KeyValueTestUtil.create("r", "f", "q", 102, "v"); + + kv1.setSequenceId(1); + kv2.setSequenceId(1); + kv3.setSequenceId(1); + l.add(kv1); + l.add(kv2); + l.add(kv3); + + this.cms.upsert(l, 2);// readpoint is 2 + long newSize = this.cms.size(); + assert (newSize > oldSize); + //The kv1 should be removed. + assert (cms.getActive().getCellsCount() == 2); + + KeyValue kv4 = KeyValueTestUtil.create("r", "f", "q", 104, "v"); + kv4.setSequenceId(1); + l.clear(); + l.add(kv4); + this.cms.upsert(l, 3); + assertEquals(newSize, this.cms.size()); + //The kv2 should be removed. + assert (cms.getActive().getCellsCount() == 2); + //this.memstore = null; + } + + /** + * Tests that the timeOfOldestEdit is updated correctly for the + * various edit operations in memstore. + * + * @throws Exception + */ + public void testUpdateToTimeOfOldestEdit() throws Exception { + try { + EnvironmentEdgeForMemstoreTest edge = new EnvironmentEdgeForMemstoreTest(); + EnvironmentEdgeManager.injectEdge(edge); + long t = cms.timeOfOldestEdit(); + assertEquals(t, Long.MAX_VALUE); + + // test the case that the timeOfOldestEdit is updated after a KV add + cms.add(KeyValueTestUtil.create("r", "f", "q", 100, "v")); + t = cms.timeOfOldestEdit(); + assertTrue(t == 1234); + // snapshot() after setForceFlushToDisk() will reset timeOfOldestEdit. The method will also assert + // the value is reset to Long.MAX_VALUE + + // t = runSnapshot(compacmemstore, false); + t = runSnapshot(cms, true); + + // test the case that the timeOfOldestEdit is updated after a KV delete + cms.delete(KeyValueTestUtil.create("r", "f", "q", 100, "v")); + t = cms.timeOfOldestEdit(); + assertTrue(t == 1234); + + t = runSnapshot(cms, true); + + // test the case that the timeOfOldestEdit is updated after a KV upsert + List l = new ArrayList(); + KeyValue kv1 = KeyValueTestUtil.create("r", "f", "q", 100, "v"); + kv1.setSequenceId(100); + l.add(kv1); + cms.upsert(l, 1000); + t = cms.timeOfOldestEdit(); + assertTrue(t == 1234); + } finally { + EnvironmentEdgeManager.reset(); + } + } + + /** + * Tests the HRegion.shouldFlush method - adds an edit in the memstore + * and checks that shouldFlush returns true, and another where it disables + * the periodic flush functionality and tests whether shouldFlush returns + * false. + * + * @throws Exception + */ + public void testShouldFlush() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(HRegion.MEMSTORE_PERIODIC_FLUSH_INTERVAL, 1000); + checkShouldFlush(conf, true); + // test disable flush + conf.setInt(HRegion.MEMSTORE_PERIODIC_FLUSH_INTERVAL, 0); + checkShouldFlush(conf, false); + } + + private void checkShouldFlush(Configuration conf, boolean expected) throws Exception { + try { + EnvironmentEdgeForMemstoreTest edge = new EnvironmentEdgeForMemstoreTest(); + EnvironmentEdgeManager.injectEdge(edge); + HBaseTestingUtility hbaseUtility = HBaseTestingUtility.createLocalHTU(conf); + HRegion region = hbaseUtility.createTestRegion("foobar", new HColumnDescriptor("foo")); + + List stores = region.getStores(); + assertTrue(stores.size() == 1); + + Store s = stores.iterator().next(); + edge.setCurrentTimeMillis(1234); + s.add(KeyValueTestUtil.create("r", "f", "q", 100, "v")); + edge.setCurrentTimeMillis(1234 + 100); + StringBuffer sb = new StringBuffer(); + assertTrue(!region.shouldFlush(sb)); + edge.setCurrentTimeMillis(1234 + 10000); + assertTrue(region.shouldFlush(sb) == expected); + } finally { + EnvironmentEdgeManager.reset(); + } + } + + /** + * Adds {@link #ROW_COUNT} rows and {@link #QUALIFIER_COUNT} + * + * @param hmc Instance to add rows to. + * @return How many rows we added. + * @throws IOException + */ + private int addRows(final AbstractMemStore hmc) { + return addRows(hmc, HConstants.LATEST_TIMESTAMP); + } + + /** + * Adds {@link #ROW_COUNT} rows and {@link #QUALIFIER_COUNT} + * + * @param hmc Instance to add rows to. + * @return How many rows we added. + * @throws IOException + */ + private int addRows(final AbstractMemStore hmc, final long ts) { + for (int i = 0; i < ROW_COUNT; i++) { + long timestamp = ts == HConstants.LATEST_TIMESTAMP ? + System.currentTimeMillis() : ts; + for (int ii = 0; ii < QUALIFIER_COUNT; ii++) { + byte[] row = Bytes.toBytes(i); + byte[] qf = makeQualifier(i, ii); + hmc.add(new KeyValue(row, FAMILY, qf, timestamp, qf)); + } + } + return ROW_COUNT; + } + + private long runSnapshot(final CompactedMemStore hmc, boolean useForce) + throws IOException { + // Save off old state. + long oldHistorySize = hmc.getSnapshot().getSize(); + long prevTimeStamp = hmc.timeOfOldestEdit(); + if (useForce) hmc.setForceFlushToDisk(); + hmc.snapshot(0); + MemStoreSnapshot snapshot = hmc.snapshot(0); + if (useForce) { + // Make some assertions about what just happened. + assertTrue("History size has not increased", oldHistorySize < snapshot.getSize()); + long t = hmc.timeOfOldestEdit(); + assertTrue("Time of oldest edit is not Long.MAX_VALUE", t == Long.MAX_VALUE); + hmc.clearSnapshot(snapshot.getId()); + } else { + long t = hmc.timeOfOldestEdit(); + assertTrue("Time of oldest edit didn't remain the same", t == prevTimeStamp); + } + return prevTimeStamp; + } + + private void isExpectedRowWithoutTimestamps(final int rowIndex, + List kvs) { + int i = 0; + for (Cell kv : kvs) { + byte[] expectedColname = makeQualifier(rowIndex, i++); + assertTrue("Column name", CellUtil.matchingQualifier(kv, expectedColname)); + // Value is column name as bytes. Usually result is + // 100 bytes in size at least. This is the default size + // for BytesWriteable. For comparison, convert bytes to + // String and trim to remove trailing null bytes. + assertTrue("Content", CellUtil.matchingValue(kv, expectedColname)); + } + } + + @Test + public void testPuttingBackChunksAfterFlushing() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] qf4 = Bytes.toBytes("testqualifier4"); + byte[] qf5 = Bytes.toBytes("testqualifier5"); + byte[] val = Bytes.toBytes("testval"); + + // Setting up memstore + cms.add(new KeyValue(row, fam, qf1, val)); + cms.add(new KeyValue(row, fam, qf2, val)); + cms.add(new KeyValue(row, fam, qf3, val)); + + // Creating a snapshot + cms.setForceFlushToDisk(); + MemStoreSnapshot snapshot = cms.snapshot(0); + assertEquals(3, cms.getSnapshot().getCellsCount()); + + // Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf4, val)); + cms.add(new KeyValue(row, fam, qf5, val)); + assertEquals(2, cms.getActive().getCellsCount()); + cms.clearSnapshot(snapshot.getId()); + + int chunkCount = chunkPool.getPoolSize(); + assertTrue(chunkCount > 0); + + } + + @Test + public void testPuttingBackChunksWithOpeningScanner() + throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] qf4 = Bytes.toBytes("testqualifier4"); + byte[] qf5 = Bytes.toBytes("testqualifier5"); + byte[] qf6 = Bytes.toBytes("testqualifier6"); + byte[] qf7 = Bytes.toBytes("testqualifier7"); + byte[] val = Bytes.toBytes("testval"); + + // Setting up memstore + cms.add(new KeyValue(row, fam, qf1, val)); + cms.add(new KeyValue(row, fam, qf2, val)); + cms.add(new KeyValue(row, fam, qf3, val)); + + // Creating a snapshot + cms.setForceFlushToDisk(); + MemStoreSnapshot snapshot = cms.snapshot(0); + assertEquals(3, cms.getSnapshot().getCellsCount()); + + // Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf4, val)); + cms.add(new KeyValue(row, fam, qf5, val)); + assertEquals(2, cms.getActive().getCellsCount()); + + // opening scanner before clear the snapshot + List scanners = cms.getScanners(0); + // Shouldn't putting back the chunks to pool,since some scanners are opening + // based on their data + cms.clearSnapshot(snapshot.getId()); + + assertTrue(chunkPool.getPoolSize() == 0); + + // Chunks will be put back to pool after close scanners; + for (KeyValueScanner scanner : scanners) { + scanner.close(); + } + assertTrue(chunkPool.getPoolSize() > 0); + + // clear chunks + chunkPool.clearChunks(); + + // Creating another snapshot + cms.setForceFlushToDisk(); + snapshot = cms.snapshot(0); + // Adding more value + cms.add(new KeyValue(row, fam, qf6, val)); + cms.add(new KeyValue(row, fam, qf7, val)); + // opening scanners + scanners = cms.getScanners(0); + // close scanners before clear the snapshot + for (KeyValueScanner scanner : scanners) { + scanner.close(); + } + // Since no opening scanner, the chunks of snapshot should be put back to + // pool + cms.clearSnapshot(snapshot.getId()); + assertTrue(chunkPool.getPoolSize() > 0); + } + + @Test + public void testPuttingBackChunksWithOpeningPipelineScanner() + throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] val = Bytes.toBytes("testval"); + + // Setting up memstore + cms.add(new KeyValue(row, fam, qf1, 1, val)); + cms.add(new KeyValue(row, fam, qf2, 1, val)); + cms.add(new KeyValue(row, fam, qf3, 1, val)); + + // Creating a pipeline + cms.disableCompaction(); + cms.flushInMemory(0); + + // Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf1, 2, val)); + cms.add(new KeyValue(row, fam, qf2, 2, val)); + assertEquals(2, cms.getActive().getCellsCount()); + + // pipeline bucket 2 + cms.flushInMemory(0); + // opening scanner before force flushing + List scanners = cms.getScanners(0); + // Shouldn't putting back the chunks to pool,since some scanners are opening + // based on their data + cms.enableCompaction(); + // trigger compaction + cms.flushInMemory(0); + + // Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf3, 3, val)); + cms.add(new KeyValue(row, fam, qf2, 3, val)); + cms.add(new KeyValue(row, fam, qf1, 3, val)); + assertEquals(3, cms.getActive().getCellsCount()); + + while (cms.isMemStoreInCompaction()) { + Threads.sleep(10); + } + + assertTrue(chunkPool.getPoolSize() == 0); + + // Chunks will be put back to pool after close scanners; + for (KeyValueScanner scanner : scanners) { + scanner.close(); + } + assertTrue(chunkPool.getPoolSize() > 0); + + // clear chunks + chunkPool.clearChunks(); + + // Creating another snapshot + cms.setForceFlushToDisk(); + MemStoreSnapshot snapshot = cms.snapshot(0); + cms.clearSnapshot(snapshot.getId()); + cms.setForceFlushToDisk(); + snapshot = cms.snapshot(0); + // Adding more value + cms.add(new KeyValue(row, fam, qf2, 4, val)); + cms.add(new KeyValue(row, fam, qf3, 4, val)); + // opening scanners + scanners = cms.getScanners(0); + // close scanners before clear the snapshot + for (KeyValueScanner scanner : scanners) { + scanner.close(); + } + // Since no opening scanner, the chunks of snapshot should be put back to + // pool + cms.clearSnapshot(snapshot.getId()); + assertTrue(chunkPool.getPoolSize() > 0); + } + + ////////////////////////////////////////////////////////////////////////////// + // Compaction tests + ////////////////////////////////////////////////////////////////////////////// + public void testCompaction1Bucket() throws IOException { + + String[] keys1 = { "A", "A", "B", "C" }; //A1, A2, B3, C4 + + // test 1 bucket + addRowsByKeys(cms, keys1); + assertEquals(704, region.getMemstoreTotalSize()); + + long size = cms.getFlushableSize(); + cms.flushInMemory(0); // push keys to pipeline and compact +// region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemStoreInCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(528, region.getMemstoreTotalSize()); + + cms.setForceFlushToDisk(); + size = cms.getFlushableSize(); + MemStoreSnapshot snapshot = cms.snapshot(0); // push keys to snapshot +// region.addAndGetGlobalMemstoreSize(-size); // simulate flusher + ImmutableSegment s = cms.getSnapshot(); + assertEquals(3, s.getCellsCount()); + assertEquals(0, region.getMemstoreTotalSize()); + + cms.clearSnapshot(snapshot.getId()); + } + + public void testCompaction2Buckets() throws IOException { + + String[] keys1 = { "A", "A", "B", "C" }; + String[] keys2 = { "A", "B", "D" }; + + addRowsByKeys(cms, keys1); + assertEquals(704, region.getMemstoreTotalSize()); + + long size = cms.getFlushableSize(); + cms.flushInMemory(0); // push keys to pipeline and compact +// region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemStoreInCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(528, region.getMemstoreTotalSize()); + + addRowsByKeys(cms, keys2); + assertEquals(1056, region.getMemstoreTotalSize()); + + size = cms.getFlushableSize(); + cms.flushInMemory(0); // push keys to pipeline and compact +// region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemStoreInCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(704, region.getMemstoreTotalSize()); + + cms.setForceFlushToDisk(); + size = cms.getFlushableSize(); + MemStoreSnapshot snapshot = cms.snapshot(0); // push keys to snapshot +// region.addAndGetGlobalMemstoreSize(-size); // simulate flusher + ImmutableSegment s = cms.getSnapshot(); + assertEquals(4, s.getCellsCount()); + assertEquals(0, region.getMemstoreTotalSize()); + + cms.clearSnapshot(snapshot.getId()); + } + + public void testCompaction3Buckets() throws IOException { + + String[] keys1 = { "A", "A", "B", "C" }; + String[] keys2 = { "A", "B", "D" }; + String[] keys3 = { "D", "B", "B" }; + + addRowsByKeys(cms, keys1); + assertEquals(704, region.getMemstoreSize()); + + long size = cms.getFlushableSize(); + cms.flushInMemory(0); // push keys to pipeline and compact +// region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemStoreInCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(0, region.getMemstoreSize()); + assertEquals(528, region.getMemstoreTotalSize()); + + addRowsByKeys(cms, keys2); + assertEquals(528, region.getMemstoreSize()); + assertEquals(1056, region.getMemstoreTotalSize()); + + cms.disableCompaction(); + size = cms.getFlushableSize(); + cms.flushInMemory(0); // push keys to pipeline without compaction +// region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(0, region.getMemstoreSize()); + assertEquals(1056, region.getMemstoreTotalSize()); + + addRowsByKeys(cms, keys3); + assertEquals(528, region.getMemstoreSize()); + assertEquals(1584, region.getMemstoreTotalSize()); + + cms.enableCompaction(); + size = cms.getFlushableSize(); + cms.flushInMemory(0); // push keys to pipeline and compact +// region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemStoreInCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(0, region.getMemstoreSize()); + assertEquals(704, region.getMemstoreTotalSize()); + + cms.setForceFlushToDisk(); + size = cms.getFlushableSize(); + MemStoreSnapshot snapshot = cms.snapshot(0); // push keys to snapshot +// region.addAndGetGlobalMemstoreSize(-size); // simulate flusher + ImmutableSegment s = cms.getSnapshot(); + assertEquals(4, s.getCellsCount()); + assertEquals(0, region.getMemstoreSize()); + assertEquals(0, region.getMemstoreTotalSize()); + + cms.clearSnapshot(snapshot.getId()); + } + + private void addRowsByKeys(final AbstractMemStore hmc, String[] keys) { + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf = Bytes.toBytes("testqualifier"); + for (int i = 0; i < keys.length; i++) { + long timestamp = System.currentTimeMillis(); + Threads.sleep(1); // to make sure each kv gets a different ts + byte[] row = Bytes.toBytes(keys[i]); + byte[] val = Bytes.toBytes(keys[i] + i); + KeyValue kv = new KeyValue(row, fam, qf, timestamp, val); + hmc.add(kv); + LOG.debug("added kv: " + kv.getKeyString() + ", timestamp" + kv.getTimestamp()); + long size = AbstractMemStore.heapSizeChange(kv, true); + region.addAndGetGlobalMemstoreSize(size); + } + } + + private static class ReadOwnWritesTester extends Thread { + static final int NUM_TRIES = 1000; + + final byte[] row; + + final byte[] f = Bytes.toBytes("family"); + final byte[] q1 = Bytes.toBytes("q1"); + + final MultiVersionConcurrencyControl mvcc; + final CompactedMemStore compmemstore; + final AtomicLong startSeqNum; + + AtomicReference caughtException; + + public ReadOwnWritesTester(int id, + CompactedMemStore memstore, + MultiVersionConcurrencyControl mvcc, + AtomicReference caughtException, + AtomicLong startSeqNum) { + this.mvcc = mvcc; + this.compmemstore = memstore; + this.caughtException = caughtException; + row = Bytes.toBytes(id); + this.startSeqNum = startSeqNum; + } + + public void run() { + try { + internalRun(); + } catch (Throwable t) { + caughtException.compareAndSet(null, t); + } + } + + private void internalRun() throws IOException { + for (long i = 0; i < NUM_TRIES && caughtException.get() == null; i++) { + MultiVersionConcurrencyControl.WriteEntry w = + mvcc.begin(); + + // Insert the sequence value (i) + byte[] v = Bytes.toBytes(i); + + KeyValue kv = new KeyValue(row, f, q1, i, v); + kv.setSequenceId(w.getWriteNumber()); + compmemstore.add(kv); + mvcc.completeAndWait(w); + + // Assert that we can read back + KeyValueScanner s = this.compmemstore.getScanners(mvcc.getReadPoint()).get(0); + s.seek(kv); + + Cell ret = s.next(); + assertNotNull("Didnt find own write at all", ret); + assertEquals("Didnt read own writes", + kv.getTimestamp(), ret.getTimestamp()); + } + } + } + + private class EnvironmentEdgeForMemstoreTest implements EnvironmentEdge { + long t = 1234; + + @Override + public long currentTime() { + return t; + } + public void setCurrentTimeMillis(long t) { + this.t = t; + } + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java index 066bbb3..ccd26e1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java @@ -18,17 +18,10 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.lang.management.ManagementFactory; -import java.lang.management.MemoryMXBean; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; - +import com.google.common.base.Joiner; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import junit.framework.TestCase; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -57,9 +50,14 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.wal.WALFactory; import org.junit.experimental.categories.Category; -import com.google.common.base.Joiner; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; /** memstore test case */ @Category({RegionServerTests.class, MediumTests.class}) @@ -86,11 +84,9 @@ public class TestDefaultMemStore extends TestCase { byte [] other = Bytes.toBytes("somethingelse"); KeyValue samekey = new KeyValue(bytes, bytes, bytes, other); this.memstore.add(samekey); - Cell found = this.memstore.cellSet.first(); - assertEquals(1, this.memstore.cellSet.size()); - assertTrue( - Bytes.toString(found.getValueArray(), found.getValueOffset(), found.getValueLength()), - CellUtil.matchingValue(samekey, found)); + Cell found = this.memstore.getActive().first(); + assertEquals(1, this.memstore.getActive().getCellsCount()); + assertTrue(Bytes.toString(found.getValueArray()), CellUtil.matchingValue(samekey, found)); } /** @@ -104,7 +100,7 @@ public class TestDefaultMemStore extends TestCase { List result = new ArrayList(); ScanInfo scanInfo = new ScanInfo(null, 0, 1, HConstants.LATEST_TIMESTAMP, KeepDeletedCells.FALSE, 0, - this.memstore.comparator); + this.memstore.getComparator()); ScanType scanType = ScanType.USER_SCAN; StoreScanner s = new StoreScanner(scan, scanInfo, scanType, null, memstorescanners); int count = 0; @@ -137,7 +133,7 @@ public class TestDefaultMemStore extends TestCase { // Row count is same as column count. assertEquals(rowCount, result.size()); if (count == 2) { - this.memstore.snapshot(); + this.memstore.snapshot(0); LOG.info("Snapshotted"); } result.clear(); @@ -164,7 +160,7 @@ public class TestDefaultMemStore extends TestCase { assertEquals("count=" + count + ", result=" + result, rowCount, result.size()); count++; if (count == snapshotIndex) { - MemStoreSnapshot snapshot = this.memstore.snapshot(); + MemStoreSnapshot snapshot = this.memstore.snapshot(0); this.memstore.clearSnapshot(snapshot.getId()); // Added more rows into kvset. But the scanner wont see these rows. addRows(this.memstore, ts); @@ -201,13 +197,13 @@ public class TestDefaultMemStore extends TestCase { verifyScanAcrossSnapshot2(kv1, kv2); // use case 2: both kvs in snapshot - this.memstore.snapshot(); + this.memstore.snapshot(0); verifyScanAcrossSnapshot2(kv1, kv2); // use case 3: first in snapshot second in kvset this.memstore = new DefaultMemStore(); this.memstore.add(kv1.clone()); - this.memstore.snapshot(); + this.memstore.snapshot(0); this.memstore.add(kv2.clone()); verifyScanAcrossSnapshot2(kv1, kv2); } @@ -472,7 +468,7 @@ public class TestDefaultMemStore extends TestCase { for (int i = 0; i < snapshotCount; i++) { addRows(this.memstore); runSnapshot(this.memstore); - assertEquals("History not being cleared", 0, this.memstore.snapshot.size()); + assertEquals("History not being cleared", 0, this.memstore.getSnapshot().getCellsCount()); } } @@ -493,7 +489,7 @@ public class TestDefaultMemStore extends TestCase { m.add(key2); assertTrue("Expected memstore to hold 3 values, actually has " + - m.cellSet.size(), m.cellSet.size() == 3); + m.getActive().getCellsCount(), m.getActive().getCellsCount() == 3); } ////////////////////////////////////////////////////////////////////////////// @@ -524,7 +520,7 @@ public class TestDefaultMemStore extends TestCase { //starting from each row, validate results should contain the starting row for (int startRowId = 0; startRowId < ROW_COUNT; startRowId++) { ScanInfo scanInfo = new ScanInfo(FAMILY, 0, 1, Integer.MAX_VALUE, KeepDeletedCells.FALSE, - 0, this.memstore.comparator); + 0, this.memstore.getComparator()); ScanType scanType = ScanType.USER_SCAN; InternalScanner scanner = new StoreScanner(new Scan( Bytes.toBytes(startRowId)), scanInfo, scanType, null, @@ -564,13 +560,13 @@ public class TestDefaultMemStore extends TestCase { memstore.add(new KeyValue(row, fam ,qf2, val)); memstore.add(new KeyValue(row, fam ,qf3, val)); //Creating a snapshot - memstore.snapshot(); - assertEquals(3, memstore.snapshot.size()); + memstore.snapshot(0); + assertEquals(3, memstore.getSnapshot().getCellsCount()); //Adding value to "new" memstore - assertEquals(0, memstore.cellSet.size()); + assertEquals(0, memstore.getActive().getCellsCount()); memstore.add(new KeyValue(row, fam ,qf4, val)); memstore.add(new KeyValue(row, fam ,qf5, val)); - assertEquals(2, memstore.cellSet.size()); + assertEquals(2, memstore.getActive().getCellsCount()); } ////////////////////////////////////////////////////////////////////////////// @@ -592,7 +588,7 @@ public class TestDefaultMemStore extends TestCase { memstore.add(put2); memstore.add(put3); - assertEquals(3, memstore.cellSet.size()); + assertEquals(3, memstore.getActive().getCellsCount()); KeyValue del2 = new KeyValue(row, fam, qf1, ts2, KeyValue.Type.Delete, val); memstore.delete(del2); @@ -603,9 +599,9 @@ public class TestDefaultMemStore extends TestCase { expected.add(put2); expected.add(put1); - assertEquals(4, memstore.cellSet.size()); + assertEquals(4, memstore.getActive().getCellsCount()); int i = 0; - for(Cell cell : memstore.cellSet) { + for(Cell cell : memstore.getActive().getCellSet()) { assertEquals(expected.get(i++), cell); } } @@ -626,7 +622,7 @@ public class TestDefaultMemStore extends TestCase { memstore.add(put2); memstore.add(put3); - assertEquals(3, memstore.cellSet.size()); + assertEquals(3, memstore.getActive().getCellsCount()); KeyValue del2 = new KeyValue(row, fam, qf1, ts2, KeyValue.Type.DeleteColumn, val); @@ -639,9 +635,9 @@ public class TestDefaultMemStore extends TestCase { expected.add(put1); - assertEquals(4, memstore.cellSet.size()); + assertEquals(4, memstore.getActive().getCellsCount()); int i = 0; - for (Cell cell: memstore.cellSet) { + for (Cell cell: memstore.getActive().getCellSet()) { assertEquals(expected.get(i++), cell); } } @@ -679,9 +675,9 @@ public class TestDefaultMemStore extends TestCase { - assertEquals(5, memstore.cellSet.size()); + assertEquals(5, memstore.getActive().getCellsCount()); int i = 0; - for (Cell cell: memstore.cellSet) { + for (Cell cell: memstore.getActive().getCellSet()) { assertEquals(expected.get(i++), cell); } } @@ -695,8 +691,8 @@ public class TestDefaultMemStore extends TestCase { memstore.add(new KeyValue(row, fam, qf, ts, val)); KeyValue delete = new KeyValue(row, fam, qf, ts, KeyValue.Type.Delete, val); memstore.delete(delete); - assertEquals(2, memstore.cellSet.size()); - assertEquals(delete, memstore.cellSet.first()); + assertEquals(2, memstore.getActive().getCellsCount()); + assertEquals(delete, memstore.getActive().first()); } public void testRetainsDeleteVersion() throws IOException { @@ -708,8 +704,8 @@ public class TestDefaultMemStore extends TestCase { "row1", "fam", "a", 100, KeyValue.Type.Delete, "dont-care"); memstore.delete(delete); - assertEquals(2, memstore.cellSet.size()); - assertEquals(delete, memstore.cellSet.first()); + assertEquals(2, memstore.getActive().getCellsCount()); + assertEquals(delete, memstore.getActive().first()); } public void testRetainsDeleteColumn() throws IOException { // add a put to memstore @@ -720,8 +716,8 @@ public class TestDefaultMemStore extends TestCase { KeyValue.Type.DeleteColumn, "dont-care"); memstore.delete(delete); - assertEquals(2, memstore.cellSet.size()); - assertEquals(delete, memstore.cellSet.first()); + assertEquals(2, memstore.getActive().getCellsCount()); + assertEquals(delete, memstore.getActive().first()); } public void testRetainsDeleteFamily() throws IOException { // add a put to memstore @@ -732,40 +728,8 @@ public class TestDefaultMemStore extends TestCase { KeyValue.Type.DeleteFamily, "dont-care"); memstore.delete(delete); - assertEquals(2, memstore.cellSet.size()); - assertEquals(delete, memstore.cellSet.first()); - } - - //////////////////////////////////// - //Test for timestamps - //////////////////////////////////// - - /** - * Test to ensure correctness when using Memstore with multiple timestamps - */ - public void testMultipleTimestamps() throws IOException { - long[] timestamps = new long[] {20,10,5,1}; - Scan scan = new Scan(); - - for (long timestamp: timestamps) - addRows(memstore,timestamp); - - scan.setTimeRange(0, 2); - assertTrue(memstore.shouldSeek(scan, Long.MIN_VALUE)); - - scan.setTimeRange(20, 82); - assertTrue(memstore.shouldSeek(scan, Long.MIN_VALUE)); - - scan.setTimeRange(10, 20); - assertTrue(memstore.shouldSeek(scan, Long.MIN_VALUE)); - - scan.setTimeRange(8, 12); - assertTrue(memstore.shouldSeek(scan, Long.MIN_VALUE)); - - /*This test is not required for correctness but it should pass when - * timestamp range optimization is on*/ - //scan.setTimeRange(28, 42); - //assertTrue(!memstore.shouldSeek(scan)); + assertEquals(2, memstore.getActive().getCellsCount()); + assertEquals(delete, memstore.getActive().first()); } //////////////////////////////////// @@ -787,7 +751,7 @@ public class TestDefaultMemStore extends TestCase { */ public void testUpsertMSLAB() throws Exception { Configuration conf = HBaseConfiguration.create(); - conf.setBoolean(DefaultMemStore.USEMSLAB_KEY, true); + conf.setBoolean(StoreSegmentFactory.USEMSLAB_KEY, true); memstore = new DefaultMemStore(conf, CellComparator.COMPARATOR); int ROW_SIZE = 2048; @@ -830,7 +794,7 @@ public class TestDefaultMemStore extends TestCase { public void testUpsertMemstoreSize() throws Exception { Configuration conf = HBaseConfiguration.create(); memstore = new DefaultMemStore(conf, CellComparator.COMPARATOR); - long oldSize = memstore.size.get(); + long oldSize = memstore.size(); List l = new ArrayList(); KeyValue kv1 = KeyValueTestUtil.create("r", "f", "q", 100, "v"); @@ -841,18 +805,18 @@ public class TestDefaultMemStore extends TestCase { l.add(kv1); l.add(kv2); l.add(kv3); this.memstore.upsert(l, 2);// readpoint is 2 - long newSize = this.memstore.size.get(); + long newSize = this.memstore.size(); assert(newSize > oldSize); //The kv1 should be removed. - assert(memstore.cellSet.size() == 2); - + assert(memstore.getActive().getCellsCount() == 2); + KeyValue kv4 = KeyValueTestUtil.create("r", "f", "q", 104, "v"); kv4.setSequenceId(1); l.clear(); l.add(kv4); this.memstore.upsert(l, 3); - assertEquals(newSize, this.memstore.size.get()); + assertEquals(newSize, this.memstore.size()); //The kv2 should be removed. - assert(memstore.cellSet.size() == 2); + assert(memstore.getActive().getCellsCount() == 2); //this.memstore = null; } @@ -1013,10 +977,10 @@ public class TestDefaultMemStore extends TestCase { private long runSnapshot(final DefaultMemStore hmc) throws UnexpectedStateException { // Save off old state. - int oldHistorySize = hmc.snapshot.size(); - MemStoreSnapshot snapshot = hmc.snapshot(); + int oldHistorySize = hmc.getSnapshot().getCellsCount(); + MemStoreSnapshot snapshot = hmc.snapshot(0); // Make some assertions about what just happened. - assertTrue("History size has not increased", oldHistorySize < hmc.snapshot.size()); + assertTrue("History size has not increased", oldHistorySize < hmc.getSnapshot().getCellsCount()); long t = memstore.timeOfOldestEdit(); assertTrue("Time of oldest edit is not Long.MAX_VALUE", t == Long.MAX_VALUE); hmc.clearSnapshot(snapshot.getId()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFlushRegionEntry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFlushRegionEntry.java index abd8c59..29b3790 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFlushRegionEntry.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFlushRegionEntry.java @@ -46,11 +46,11 @@ public class TestFlushRegionEntry { HRegion r = mock(HRegion.class); doReturn(hri).when(r).getRegionInfo(); - FlushRegionEntry entry = new FlushRegionEntry(r, true); - FlushRegionEntry other = new FlushRegionEntry(r, true); + FlushRegionEntry entry = new FlushRegionEntry(r, true, false); + FlushRegionEntry other = new FlushRegionEntry(r, true, false); assertEquals(entry.hashCode(), other.hashCode()); assertEquals(entry, other); } -} \ No newline at end of file +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java index 2cb3b38..649f11d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java @@ -18,25 +18,10 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.security.Key; -import java.security.SecureRandom; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Date; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableSet; -import java.util.concurrent.ConcurrentSkipListSet; - -import javax.crypto.spec.SecretKeySpec; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.HarFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; @@ -58,9 +43,7 @@ import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.mob.MobConstants; import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.monitoring.MonitoredTask; -import org.apache.hadoop.hbase.regionserver.StoreFile.Reader; import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; import org.apache.hadoop.hbase.regionserver.compactions.NoLimitCompactionThroughputController; import org.apache.hadoop.hbase.security.EncryptionUtil; import org.apache.hadoop.hbase.security.User; @@ -76,6 +59,19 @@ import org.junit.experimental.categories.Category; import org.junit.rules.TestName; import org.mockito.Mockito; +import javax.crypto.spec.SecretKeySpec; +import java.io.IOException; +import java.security.Key; +import java.security.SecureRandom; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableSet; +import java.util.concurrent.ConcurrentSkipListSet; + @Category(MediumTests.class) public class TestHMobStore { public static final Log LOG = LogFactory.getLog(TestHMobStore.class); @@ -469,7 +465,7 @@ public class TestHMobStore { this.store.snapshot(); flushStore(store, id++); Assert.assertEquals(storeFilesSize, this.store.getStorefiles().size()); - Assert.assertEquals(0, ((DefaultMemStore)this.store.memstore).cellSet.size()); + Assert.assertEquals(0, ((DefaultMemStore)this.store.memstore).getActive().getCellsCount()); } /** @@ -480,7 +476,7 @@ public class TestHMobStore { */ private static void flushStore(HMobStore store, long id) throws IOException { StoreFlushContext storeFlushCtx = store.createFlushContext(id); - storeFlushCtx.prepare(); + storeFlushCtx.prepareFlushToDisk(id); storeFlushCtx.flushCache(Mockito.mock(MonitoredTask.class)); storeFlushCtx.commit(Mockito.mock(MonitoredTask.class)); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index cb95d6f..883cba0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -18,49 +18,10 @@ */ package org.apache.hadoop.hbase.regionserver; - -import static org.apache.hadoop.hbase.HBaseTestingUtility.COLUMNS; -import static org.apache.hadoop.hbase.HBaseTestingUtility.FIRST_CHAR; -import static org.apache.hadoop.hbase.HBaseTestingUtility.LAST_CHAR; -import static org.apache.hadoop.hbase.HBaseTestingUtility.START_KEY; -import static org.apache.hadoop.hbase.HBaseTestingUtility.fam1; -import static org.apache.hadoop.hbase.HBaseTestingUtility.fam2; -import static org.apache.hadoop.hbase.HBaseTestingUtility.fam3; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyBoolean; -import static org.mockito.Matchers.anyLong; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.io.InterruptedIOException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.NavigableMap; -import java.util.TreeMap; -import java.util.UUID; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; - +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.protobuf.ByteString; import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -157,21 +118,37 @@ import org.apache.hadoop.hbase.wal.WALKey; import org.apache.hadoop.hbase.wal.WALProvider; import org.apache.hadoop.hbase.wal.WALProvider.Writer; import org.apache.hadoop.hbase.wal.WALSplitter; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; +import org.junit.*; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; import org.mockito.ArgumentCaptor; import org.mockito.ArgumentMatcher; import org.mockito.Mockito; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.protobuf.ByteString; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.apache.hadoop.hbase.HBaseTestingUtility.*; +import static org.junit.Assert.*; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.*; /** * Basic stand-alone testing of HRegion. No clusters! @@ -284,7 +261,7 @@ public class TestHRegion { // First put something in current memstore, which will be in snapshot after flusher.prepare() region.put(put); StoreFlushContext storeFlushCtx = store.createFlushContext(12345); - storeFlushCtx.prepare(); + storeFlushCtx.prepareFlushToDisk(12345); // Second put something in current memstore put.add(COLUMN_FAMILY_BYTES, Bytes.toBytes("abc"), value); region.put(put); @@ -294,8 +271,6 @@ public class TestHRegion { HBaseTestingUtility.closeRegionAndWAL(region); } - - /* * This test is for verifying memstore snapshot size is correctly updated in case of rollback * See HBASE-10845 @@ -315,7 +290,7 @@ public class TestHRegion { @Override public void sync(long txid) throws IOException { - storeFlushCtx.prepare(); + storeFlushCtx.prepareFlushToDisk(0); super.sync(txid); } } @@ -324,7 +299,7 @@ public class TestHRegion { Path rootDir = new Path(dir + "testMemstoreSnapshotSize"); MyFaultyFSLog faultyLog = new MyFaultyFSLog(fs, rootDir, "testMemstoreSnapshotSize", CONF); HRegion region = initHRegion(tableName, null, null, name.getMethodName(), - CONF, false, Durability.SYNC_WAL, faultyLog, COLUMN_FAMILY_BYTES); + CONF, false, Durability.SYNC_WAL, faultyLog, COLUMN_FAMILY_BYTES); Store store = region.getStore(COLUMN_FAMILY_BYTES); // Get some random bytes. @@ -520,7 +495,7 @@ public class TestHRegion { // Manufacture an outstanding snapshot -- fake a failed flush by doing prepare step only. Store store = region.getStore(COLUMN_FAMILY_BYTES); StoreFlushContext storeFlushCtx = store.createFlushContext(12345); - storeFlushCtx.prepare(); + storeFlushCtx.prepareFlushToDisk(12345); // Now add two entries to the foreground memstore. Put p2 = new Put(row); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); @@ -2437,10 +2412,10 @@ public class TestHRegion { // This is kinda hacky, but better than nothing... long now = System.currentTimeMillis(); DefaultMemStore memstore = (DefaultMemStore) ((HStore) region.getStore(fam1)).memstore; - Cell firstCell = memstore.cellSet.first(); + Cell firstCell = ((HStore) region.getStore(fam1)).memstore.getActive().first(); assertTrue(firstCell.getTimestamp() <= now); now = firstCell.getTimestamp(); - for (Cell cell : memstore.cellSet) { + for (Cell cell : memstore.getActive().getCellSet()) { assertTrue(cell.getTimestamp() <= now); now = cell.getTimestamp(); } @@ -6007,7 +5982,7 @@ public class TestHRegion { public void testOpenRegionWrittenToWALForLogReplay() throws Exception { // similar to the above test but with distributed log replay final ServerName serverName = ServerName.valueOf("testOpenRegionWrittenToWALForLogReplay", - 100, 42); + 100, 42); final RegionServerServices rss = spy(TEST_UTIL.createMockRegionServerService(serverName)); HTableDescriptor htd diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionWithInMemoryFlush.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionWithInMemoryFlush.java new file mode 100644 index 0000000..caee700 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionWithInMemoryFlush.java @@ -0,0 +1,6236 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.protobuf.ByteString; +import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.client.*; +import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException; +import org.apache.hadoop.hbase.filter.*; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor; +import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor; +import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.FlushAction; +import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor; +import org.apache.hadoop.hbase.protobuf.generated.WALProtos.StoreDescriptor; +import org.apache.hadoop.hbase.regionserver.handler.FinishRegionRecoveringHandler; +import org.apache.hadoop.hbase.regionserver.wal.FSHLog; +import org.apache.hadoop.hbase.regionserver.wal.HLogKey; +import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL; +import org.apache.hadoop.hbase.regionserver.wal.MetricsWALSource; +import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; +import org.apache.hadoop.hbase.regionserver.wal.WALEdit; +import org.apache.hadoop.hbase.regionserver.wal.WALUtil; +import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.hbase.test.MetricsAssertHelper; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.IncrementingEnvironmentEdge; +import org.apache.hadoop.hbase.util.PairOfSameType; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.wal.DefaultWALProvider; +import org.apache.hadoop.hbase.wal.FaultyFSLog; +import org.apache.hadoop.hbase.wal.WAL; +import org.apache.hadoop.hbase.wal.WALFactory; +import org.apache.hadoop.hbase.wal.WALKey; +import org.apache.hadoop.hbase.wal.WALProvider; +import org.apache.hadoop.hbase.wal.WALProvider.Writer; +import org.apache.hadoop.hbase.wal.WALSplitter; +import org.junit.*; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.mockito.ArgumentCaptor; +import org.mockito.ArgumentMatcher; +import org.mockito.Mockito; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.security.PrivilegedExceptionAction; +import java.util.*; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.apache.hadoop.hbase.HBaseTestingUtility.*; +import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.*; + +/** + * A test similar to TestHRegion, but with in-memory flush families. + * Also checks wal truncation after in-memory compaction. + */ +@Category({VerySlowRegionServerTests.class, LargeTests.class}) +@SuppressWarnings("deprecation") +public class TestHRegionWithInMemoryFlush { + // Do not spin up clusters in here. If you need to spin up a cluster, do it + // over in TestHRegionOnCluster. + private static final Log LOG = LogFactory.getLog(TestHRegion.class); + @Rule public TestName name = new TestName(); + + private static final String COLUMN_FAMILY = "MyCF"; + private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY); + + HRegion region = null; + // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack) + private static HBaseTestingUtility TEST_UTIL; + public static Configuration CONF ; + private String dir; + private static FileSystem FILESYSTEM; + private final int MAX_VERSIONS = 2; + + // Test names + protected TableName tableName; + protected String method; + protected final byte[] qual1 = Bytes.toBytes("qual1"); + protected final byte[] qual2 = Bytes.toBytes("qual2"); + protected final byte[] qual3 = Bytes.toBytes("qual3"); + protected final byte[] value1 = Bytes.toBytes("value1"); + protected final byte[] value2 = Bytes.toBytes("value2"); + protected final byte[] row = Bytes.toBytes("rowA"); + protected final byte[] row2 = Bytes.toBytes("rowB"); + + protected final MetricsAssertHelper metricsAssertHelper = CompatibilitySingletonFactory + .getInstance(MetricsAssertHelper.class); + + @Before + public void setup() throws IOException { + TEST_UTIL = HBaseTestingUtility.createLocalHTU(); + FILESYSTEM = TEST_UTIL.getTestFileSystem(); + CONF = TEST_UTIL.getConfiguration(); + dir = TEST_UTIL.getDataTestDir("TestHRegion").toString(); + method = name.getMethodName(); + tableName = TableName.valueOf(name.getMethodName()); + } + + @After + public void tearDown() throws Exception { + EnvironmentEdgeManagerTestHelper.reset(); + LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir()); + TEST_UTIL.cleanupTestDir(); + } + + String getName() { + return name.getMethodName(); + } + + /** + * Test that I can use the max flushed sequence id after the close. + * @throws IOException + */ + @Test(timeout = 100000) + public void testSequenceId() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, COLUMN_FAMILY_BYTES); + assertEquals(HConstants.NO_SEQNUM, region.getMaxFlushedSeqId()); + // Weird. This returns 0 if no store files or no edits. Afraid to change it. + assertEquals(0, (long)region.getMaxStoreSeqId().get(COLUMN_FAMILY_BYTES)); + region.close(); + assertEquals(HConstants.NO_SEQNUM, region.getMaxFlushedSeqId()); + assertEquals(0, (long)region.getMaxStoreSeqId().get(COLUMN_FAMILY_BYTES)); + // Open region again. + region = initHRegion(tableName, name.getMethodName(), CONF, COLUMN_FAMILY_BYTES); + byte [] value = Bytes.toBytes(name.getMethodName()); + // Make a random put against our cf. + Put put = new Put(value); + put.addColumn(COLUMN_FAMILY_BYTES, null, value); + region.put(put); + // No flush yet so init numbers should still be in place. + assertEquals(HConstants.NO_SEQNUM, region.getMaxFlushedSeqId()); + assertEquals(0, (long)region.getMaxStoreSeqId().get(COLUMN_FAMILY_BYTES)); + region.flush(true); + long max = region.getMaxFlushedSeqId(); + region.close(); + assertEquals(max, region.getMaxFlushedSeqId()); + } + + /** + * Test for Bug 2 of HBASE-10466. + * "Bug 2: Conditions for the first flush of region close (so-called pre-flush) If memstoreSize + * is smaller than a certain value, or when region close starts a flush is ongoing, the first + * flush is skipped and only the second flush takes place. However, two flushes are required in + * case previous flush fails and leaves some data in snapshot. The bug could cause loss of data + * in current memstore. The fix is removing all conditions except abort check so we ensure 2 + * flushes for region close." + * @throws IOException + */ + @Test (timeout=60000) + public void testCloseCarryingSnapshot() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, COLUMN_FAMILY_BYTES); + Store store = region.getStore(COLUMN_FAMILY_BYTES); + // Get some random bytes. + byte [] value = Bytes.toBytes(name.getMethodName()); + // Make a random put against our cf. + Put put = new Put(value); + put.add(COLUMN_FAMILY_BYTES, null, value); + // First put something in current memstore, which will be in snapshot after flusher.prepare() + region.put(put); + assertEquals(208, region.getMemstoreSize()); + store.setForceFlushToDisk(); + StoreFlushContext storeFlushCtx = store.createFlushContext(12345); + storeFlushCtx.prepareFlushToDisk(12345); + assertEquals(208, region.getMemstoreSize()); + // Second put something in current memstore + put.add(COLUMN_FAMILY_BYTES, Bytes.toBytes("abc"), value); + region.put(put); + assertEquals(624, region.getMemstoreSize()); + // Close with something in memstore and something in the snapshot. Make sure all is cleared. + region.close(); + assertEquals(0, region.getMemstoreSize()); + HBaseTestingUtility.closeRegionAndWAL(region); + } + + /* + * This test is for verifying memstore snapshot size is correctly updated in case of rollback + * See HBASE-10845 + */ + @Test (timeout=60000) + public void testMemstoreSnapshotSize() throws IOException { + class MyFaultyFSLog extends FaultyFSLog { + StoreFlushContext storeFlushCtx; + public MyFaultyFSLog(FileSystem fs, Path rootDir, String logName, Configuration conf) + throws IOException { + super(fs, rootDir, logName, conf); + } + + void setStoreFlushCtx(StoreFlushContext storeFlushCtx) { + this.storeFlushCtx = storeFlushCtx; + } + + @Override + public void sync(long txid) throws IOException { + storeFlushCtx.prepareFlushToDisk(0); + super.sync(txid); + } + } + + FileSystem fs = FileSystem.get(CONF); + Path rootDir = new Path(dir + "testMemstoreSnapshotSize"); + MyFaultyFSLog faultyLog = new MyFaultyFSLog(fs, rootDir, "testMemstoreSnapshotSize", CONF); + HRegion region = initHRegion(tableName, null, null, name.getMethodName(), + CONF, false, Durability.SYNC_WAL, faultyLog, COLUMN_FAMILY_BYTES); + + Store store = region.getStore(COLUMN_FAMILY_BYTES); + // Get some random bytes. + byte [] value = Bytes.toBytes(name.getMethodName()); + faultyLog.setStoreFlushCtx(store.createFlushContext(12345)); + + Put put = new Put(value); + put.add(COLUMN_FAMILY_BYTES, Bytes.toBytes("abc"), value); + faultyLog.setFailureType(FaultyFSLog.FailureType.SYNC); + + boolean threwIOE = false; + try { + region.put(put); + } catch (IOException ioe) { + threwIOE = true; + } finally { + assertTrue("The regionserver should have thrown an exception", threwIOE); + } + long sz = store.getFlushableSize(); + assertTrue("flushable size should be zero, but it is " + sz, sz == 0); + HBaseTestingUtility.closeRegionAndWAL(region); + } + + /** + * Create a WAL outside of the usual helper in + * {@link HBaseTestingUtility#createWal(Configuration, Path, org.apache.hadoop.hbase.HRegionInfo)} because that method + * doesn't play nicely with FaultyFileSystem. Call this method before overriding + * {@code fs.file.impl}. + * @param callingMethod a unique component for the path, probably the name of the test method. + */ + private static WAL createWALCompatibleWithFaultyFileSystem(String callingMethod, + Configuration conf, TableName tableName) throws IOException { + final Path logDir = TEST_UTIL.getDataTestDirOnTestFS(callingMethod + ".log"); + final Configuration walConf = new Configuration(conf); + FSUtils.setRootDir(walConf, logDir); + return (new WALFactory(walConf, + Collections.singletonList(new MetricsWAL()), callingMethod)) + .getWAL(tableName.toBytes(), tableName.getNamespace()); + } + + /** + * Test we do not lose data if we fail a flush and then close. + * Part of HBase-10466. Tests the following from the issue description: + * "Bug 1: Wrong calculation of HRegion.memstoreSize: When a flush fails, data to be flushed is + * kept in each MemStore's snapshot and wait for next flush attempt to continue on it. But when + * the next flush succeeds, the counter of total memstore size in HRegion is always deduced by + * the sum of current memstore sizes instead of snapshots left from previous failed flush. This + * calculation is problematic that almost every time there is failed flush, HRegion.memstoreSize + * gets reduced by a wrong value. If region flush could not proceed for a couple cycles, the size + * in current memstore could be much larger than the snapshot. It's likely to drift memstoreSize + * much smaller than expected. In extreme case, if the error accumulates to even bigger than + * HRegion's memstore size limit, any further flush is skipped because flush does not do anything + * if memstoreSize is not larger than 0." + * @throws Exception + */ + @Test (timeout=60000) + public void testFlushSizeAccounting() throws Exception { + final Configuration conf = HBaseConfiguration.create(CONF); + final String callingMethod = name.getMethodName(); + final WAL wal = createWALCompatibleWithFaultyFileSystem(callingMethod, conf, tableName); + // Only retry once. + conf.setInt("hbase.hstore.flush.retries.number", 1); + final User user = + User.createUserForTesting(conf, this.name.getMethodName(), new String[]{"foo"}); + // Inject our faulty LocalFileSystem + conf.setClass("fs.file.impl", TestStore.FaultyFileSystem.class, FileSystem.class); + user.runAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + // Make sure it worked (above is sensitive to caching details in hadoop core) + FileSystem fs = FileSystem.get(conf); + Assert.assertEquals(TestStore.FaultyFileSystem.class, fs.getClass()); + TestStore.FaultyFileSystem ffs = (TestStore.FaultyFileSystem)fs; + HRegion region = null; + try { + // Initialize region + region = initHRegion(tableName, null, null, callingMethod, conf, false, + Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES); + long size = region.getMemstoreSize(); + Assert.assertEquals(0, size); + // Put one item into memstore. Measure the size of one item in memstore. + Put p1 = new Put(row); + p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[])null)); + region.put(p1); + final long sizeOfOnePut = region.getMemstoreSize(); + // Fail a flush which means the current memstore will hang out as memstore 'snapshot'. + try { + LOG.info("Flushing"); + region.flush(true,true); //force flush to disk instead of in-memory flush + Assert.fail("Didn't bubble up IOE!"); + } catch (DroppedSnapshotException dse) { + // What we are expecting + region.closing.set(false); // this is needed for the rest of the test to work + } + // Make it so all writes succeed from here on out + ffs.fault.set(false); + // Check sizes. + Assert.assertEquals(sizeOfOnePut, region.getMemstoreTotalSize()); + Assert.assertEquals(sizeOfOnePut, region.getMemstoreSize()); + // Now add two entries so that on this next flush that fails, we can see if we + // subtract the right amount, the snapshot size only. + Put p2 = new Put(row); + p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); + p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[])null)); + region.put(p2); + Assert.assertEquals(sizeOfOnePut * 3, region.getMemstoreTotalSize()); + Assert.assertEquals(sizeOfOnePut * 3, region.getMemstoreSize()); + // Do a successful flush. It will clear the snapshot only. Thats how flushes work. + // If already a snapshot, we clear it else we move the memstore to be snapshot and flush + // it + region.flush(true, true); + // Make sure our memory accounting is right. + Assert.assertEquals(sizeOfOnePut * 2, region.getMemstoreTotalSize()); + Assert.assertEquals(sizeOfOnePut * 2, region.getMemstoreSize()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(region); + } + return null; + } + }); + FileSystem.closeAllForUGI(user.getUGI()); + } + + @Test (timeout=60000) + public void testCloseWithFailingFlush() throws Exception { + final Configuration conf = HBaseConfiguration.create(CONF); + final String callingMethod = name.getMethodName(); + final WAL wal = createWALCompatibleWithFaultyFileSystem(callingMethod, conf, tableName); + // Only retry once. + conf.setInt("hbase.hstore.flush.retries.number", 1); + final User user = + User.createUserForTesting(conf, this.name.getMethodName(), new String[]{"foo"}); + // Inject our faulty LocalFileSystem + conf.setClass("fs.file.impl", TestStore.FaultyFileSystem.class, FileSystem.class); + user.runAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + // Make sure it worked (above is sensitive to caching details in hadoop core) + FileSystem fs = FileSystem.get(conf); + Assert.assertEquals(TestStore.FaultyFileSystem.class, fs.getClass()); + TestStore.FaultyFileSystem ffs = (TestStore.FaultyFileSystem)fs; + HRegion region = null; + try { + // Initialize region + region = initHRegion(tableName, null, null, callingMethod, conf, false, + Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES); + long size = region.getMemstoreSize(); + Assert.assertEquals(0, size); + // Put one item into memstore. Measure the size of one item in memstore. + Put p1 = new Put(row); + p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[])null)); + region.put(p1); + // Manufacture an outstanding snapshot -- fake a failed flush by doing prepare step only. + Store store = region.getStore(COLUMN_FAMILY_BYTES); + StoreFlushContext storeFlushCtx = store.createFlushContext(12345); + storeFlushCtx.prepareFlushToDisk(12345); + // Now add two entries to the foreground memstore. + Put p2 = new Put(row); + p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); + p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[])null)); + region.put(p2); + // Now try close on top of a failing flush. + region.close(); + fail(); + } catch (DroppedSnapshotException dse) { + // Expected + LOG.info("Expected DroppedSnapshotException"); + } finally { + // Make it so all writes succeed from here on out so can close clean + ffs.fault.set(false); + HBaseTestingUtility.closeRegionAndWAL(region); + } + return null; + } + }); + FileSystem.closeAllForUGI(user.getUGI()); + } + + @Test + public void testCompactionAffectedByScanners() throws Exception { + byte[] family = Bytes.toBytes("family"); + this.region = initHRegion(tableName, method, CONF, family); + + Put put = new Put(Bytes.toBytes("r1")); + put.add(family, Bytes.toBytes("q1"), Bytes.toBytes("v1")); + region.put(put); + region.flush(true); + + Scan scan = new Scan(); + scan.setMaxVersions(3); + // open the first scanner + RegionScanner scanner1 = region.getScanner(scan); + + Delete delete = new Delete(Bytes.toBytes("r1")); + region.delete(delete); + region.flush(true); + + // open the second scanner + RegionScanner scanner2 = region.getScanner(scan); + + List results = new ArrayList(); + + System.out.println("Smallest read point:" + region.getSmallestReadPoint()); + + // make a major compaction + region.compact(true); + + // open the third scanner + RegionScanner scanner3 = region.getScanner(scan); + + // get data from scanner 1, 2, 3 after major compaction + scanner1.next(results); + System.out.println(results); + assertEquals(1, results.size()); + + results.clear(); + scanner2.next(results); + System.out.println(results); + assertEquals(0, results.size()); + + results.clear(); + scanner3.next(results); + System.out.println(results); + assertEquals(0, results.size()); + } + + @Test + public void testToShowNPEOnRegionScannerReseek() throws Exception { + byte[] family = Bytes.toBytes("family"); + this.region = initHRegion(tableName, method, CONF, family); + + Put put = new Put(Bytes.toBytes("r1")); + put.add(family, Bytes.toBytes("q1"), Bytes.toBytes("v1")); + region.put(put); + put = new Put(Bytes.toBytes("r2")); + put.add(family, Bytes.toBytes("q1"), Bytes.toBytes("v1")); + region.put(put); + region.flush(true); + + Scan scan = new Scan(); + scan.setMaxVersions(3); + // open the first scanner + RegionScanner scanner1 = region.getScanner(scan); + + System.out.println("Smallest read point:" + region.getSmallestReadPoint()); + + region.compact(true); + + scanner1.reseek(Bytes.toBytes("r2")); + List results = new ArrayList(); + scanner1.next(results); + Cell keyValue = results.get(0); + Assert.assertTrue(Bytes.compareTo(CellUtil.cloneRow(keyValue), Bytes.toBytes("r2")) == 0); + scanner1.close(); + } + + @Test + public void testSkipRecoveredEditsReplay() throws Exception { + String method = "testSkipRecoveredEditsReplay"; + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + this.region = initHRegion(tableName, method, CONF, family); + final WALFactory wals = new WALFactory(CONF, null, method); + try { + Path regiondir = region.getRegionFileSystem().getRegionDir(); + FileSystem fs = region.getRegionFileSystem().getFileSystem(); + byte[] regionName = region.getRegionInfo().getEncodedNameAsBytes(); + + Path recoveredEditsDir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir); + + long maxSeqId = 1050; + long minSeqId = 1000; + + for (long i = minSeqId; i <= maxSeqId; i += 10) { + Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", i)); + fs.create(recoveredEdits); + WALProvider.Writer writer = wals.createRecoveredEditsWriter(fs, recoveredEdits); + + long time = System.nanoTime(); + WALEdit edit = new WALEdit(); + edit.add(new KeyValue(row, family, Bytes.toBytes(i), time, KeyValue.Type.Put, Bytes + .toBytes(i))); + writer.append(new WAL.Entry(new HLogKey(regionName, tableName, i, time, + HConstants.DEFAULT_CLUSTER_ID), edit)); + + writer.close(); + } + MonitoredTask status = TaskMonitor.get().createStatus(method); + Map maxSeqIdInStores = new TreeMap(Bytes.BYTES_COMPARATOR); + for (Store store : region.getStores()) { + maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(), minSeqId - 1); + } + long seqId = region.replayRecoveredEditsIfAny(regiondir, maxSeqIdInStores, null, status); + assertEquals(maxSeqId, seqId); + region.getMVCC().advanceTo(seqId); + Get get = new Get(row); + Result result = region.get(get); + for (long i = minSeqId; i <= maxSeqId; i += 10) { + List kvs = result.getColumnCells(family, Bytes.toBytes(i)); + assertEquals(1, kvs.size()); + assertArrayEquals(Bytes.toBytes(i), CellUtil.cloneValue(kvs.get(0))); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + wals.close(); + } + } + + @Test + public void testSkipRecoveredEditsReplaySomeIgnored() throws Exception { + String method = "testSkipRecoveredEditsReplaySomeIgnored"; + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + this.region = initHRegion(tableName, method, CONF, family); + final WALFactory wals = new WALFactory(CONF, null, method); + try { + Path regiondir = region.getRegionFileSystem().getRegionDir(); + FileSystem fs = region.getRegionFileSystem().getFileSystem(); + byte[] regionName = region.getRegionInfo().getEncodedNameAsBytes(); + + Path recoveredEditsDir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir); + + long maxSeqId = 1050; + long minSeqId = 1000; + + for (long i = minSeqId; i <= maxSeqId; i += 10) { + Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", i)); + fs.create(recoveredEdits); + WALProvider.Writer writer = wals.createRecoveredEditsWriter(fs, recoveredEdits); + + long time = System.nanoTime(); + WALEdit edit = new WALEdit(); + edit.add(new KeyValue(row, family, Bytes.toBytes(i), time, KeyValue.Type.Put, Bytes + .toBytes(i))); + writer.append(new WAL.Entry(new HLogKey(regionName, tableName, i, time, + HConstants.DEFAULT_CLUSTER_ID), edit)); + + writer.close(); + } + long recoverSeqId = 1030; + MonitoredTask status = TaskMonitor.get().createStatus(method); + Map maxSeqIdInStores = new TreeMap(Bytes.BYTES_COMPARATOR); + for (Store store : region.getStores()) { + maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(), recoverSeqId - 1); + } + long seqId = region.replayRecoveredEditsIfAny(regiondir, maxSeqIdInStores, null, status); + assertEquals(maxSeqId, seqId); + region.getMVCC().advanceTo(seqId); + Get get = new Get(row); + Result result = region.get(get); + for (long i = minSeqId; i <= maxSeqId; i += 10) { + List kvs = result.getColumnCells(family, Bytes.toBytes(i)); + if (i < recoverSeqId) { + assertEquals(0, kvs.size()); + } else { + assertEquals(1, kvs.size()); + assertArrayEquals(Bytes.toBytes(i), CellUtil.cloneValue(kvs.get(0))); + } + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + wals.close(); + } + } + + @Test + public void testSkipRecoveredEditsReplayAllIgnored() throws Exception { + byte[] family = Bytes.toBytes("family"); + this.region = initHRegion(tableName, method, CONF, family); + try { + Path regiondir = region.getRegionFileSystem().getRegionDir(); + FileSystem fs = region.getRegionFileSystem().getFileSystem(); + + Path recoveredEditsDir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir); + for (int i = 1000; i < 1050; i += 10) { + Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", i)); + FSDataOutputStream dos = fs.create(recoveredEdits); + dos.writeInt(i); + dos.close(); + } + long minSeqId = 2000; + Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", minSeqId - 1)); + FSDataOutputStream dos = fs.create(recoveredEdits); + dos.close(); + + Map maxSeqIdInStores = new TreeMap(Bytes.BYTES_COMPARATOR); + for (Store store : region.getStores()) { + maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(), minSeqId); + } + long seqId = region.replayRecoveredEditsIfAny(regiondir, maxSeqIdInStores, null, null); + assertEquals(minSeqId, seqId); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testSkipRecoveredEditsReplayTheLastFileIgnored() throws Exception { + String method = "testSkipRecoveredEditsReplayTheLastFileIgnored"; + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + this.region = initHRegion(tableName, method, CONF, family); + final WALFactory wals = new WALFactory(CONF, null, method); + try { + Path regiondir = region.getRegionFileSystem().getRegionDir(); + FileSystem fs = region.getRegionFileSystem().getFileSystem(); + byte[] regionName = region.getRegionInfo().getEncodedNameAsBytes(); + byte[][] columns = region.getTableDesc().getFamiliesKeys().toArray(new byte[0][]); + + assertEquals(0, region.getStoreFileList(columns).size()); + + Path recoveredEditsDir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir); + + long maxSeqId = 1050; + long minSeqId = 1000; + + for (long i = minSeqId; i <= maxSeqId; i += 10) { + Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", i)); + fs.create(recoveredEdits); + WALProvider.Writer writer = wals.createRecoveredEditsWriter(fs, recoveredEdits); + + long time = System.nanoTime(); + WALEdit edit = null; + if (i == maxSeqId) { + edit = WALEdit.createCompaction(region.getRegionInfo(), + CompactionDescriptor.newBuilder() + .setTableName(ByteString.copyFrom(tableName.getName())) + .setFamilyName(ByteString.copyFrom(regionName)) + .setEncodedRegionName(ByteString.copyFrom(regionName)) + .setStoreHomeDirBytes(ByteString.copyFrom(Bytes.toBytes(regiondir.toString()))) + .setRegionName(ByteString.copyFrom(region.getRegionInfo().getRegionName())) + .build()); + } else { + edit = new WALEdit(); + edit.add(new KeyValue(row, family, Bytes.toBytes(i), time, KeyValue.Type.Put, Bytes + .toBytes(i))); + } + writer.append(new WAL.Entry(new HLogKey(regionName, tableName, i, time, + HConstants.DEFAULT_CLUSTER_ID), edit)); + writer.close(); + } + + long recoverSeqId = 1030; + Map maxSeqIdInStores = new TreeMap(Bytes.BYTES_COMPARATOR); + MonitoredTask status = TaskMonitor.get().createStatus(method); + for (Store store : region.getStores()) { + maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(), recoverSeqId - 1); + } + long seqId = region.replayRecoveredEditsIfAny(regiondir, maxSeqIdInStores, null, status); + assertEquals(maxSeqId, seqId); + + // assert that the files are flushed + assertEquals(1, region.getStoreFileList(columns).size()); + + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + wals.close(); + } + } + + @Test + public void testRecoveredEditsReplayCompaction() throws Exception { + String method = name.getMethodName(); + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + this.region = initHRegion(tableName, method, CONF, family); + final WALFactory wals = new WALFactory(CONF, null, method); + try { + Path regiondir = region.getRegionFileSystem().getRegionDir(); + FileSystem fs = region.getRegionFileSystem().getFileSystem(); + byte[] regionName = region.getRegionInfo().getEncodedNameAsBytes(); + + long maxSeqId = 3; + long minSeqId = 0; + + for (long i = minSeqId; i < maxSeqId; i++) { + Put put = new Put(Bytes.toBytes(i)); + put.add(family, Bytes.toBytes(i), Bytes.toBytes(i)); + region.put(put); + region.flush(true,true); + assertEquals(i+1, region.getStore(family).getStorefilesCount()); + } + + // this will create a region with 3 files + assertEquals(3, region.getStore(family).getStorefilesCount()); + List storeFiles = new ArrayList(3); + for (StoreFile sf : region.getStore(family).getStorefiles()) { + storeFiles.add(sf.getPath()); + } + + // disable compaction completion + CONF.setBoolean("hbase.hstore.compaction.complete", false); + region.compactStores(); + + // ensure that nothing changed + assertEquals(3, region.getStore(family).getStorefilesCount()); + + // now find the compacted file, and manually add it to the recovered edits + Path tmpDir = region.getRegionFileSystem().getTempDir(); + FileStatus[] files = FSUtils.listStatus(fs, tmpDir); + String errorMsg = "Expected to find 1 file in the region temp directory " + + "from the compaction, could not find any"; + assertNotNull(errorMsg, files); + assertEquals(errorMsg, 1, files.length); + // move the file inside region dir + Path newFile = region.getRegionFileSystem().commitStoreFile(Bytes.toString(family), + files[0].getPath()); + + CompactionDescriptor + compactionDescriptor = ProtobufUtil.toCompactionDescriptor(this.region + .getRegionInfo(), family, storeFiles, Lists.newArrayList(newFile), region + .getRegionFileSystem().getStoreDir(Bytes.toString(family))); + + WALUtil.writeCompactionMarker(region.getWAL(), this.region.getTableDesc(), + this.region.getRegionInfo(), compactionDescriptor, region.getMVCC()); + + Path recoveredEditsDir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir); + + Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", 1000)); + fs.create(recoveredEdits); + WALProvider.Writer writer = wals.createRecoveredEditsWriter(fs, recoveredEdits); + + long time = System.nanoTime(); + + writer.append(new WAL.Entry(new HLogKey(regionName, tableName, 10, time, + HConstants.DEFAULT_CLUSTER_ID), WALEdit.createCompaction(region.getRegionInfo(), + compactionDescriptor))); + writer.close(); + + // close the region now, and reopen again + region.getTableDesc(); + region.getRegionInfo(); + region.close(); + region = HRegion.openHRegion(region, null); + + // now check whether we have only one store file, the compacted one + Collection sfs = region.getStore(family).getStorefiles(); + for (StoreFile sf : sfs) { + LOG.info(sf.getPath()); + } + assertEquals(1, region.getStore(family).getStorefilesCount()); + files = FSUtils.listStatus(fs, tmpDir); + assertTrue("Expected to find 0 files inside " + tmpDir, files == null || files.length == 0); + + for (long i = minSeqId; i < maxSeqId; i++) { + Get get = new Get(Bytes.toBytes(i)); + Result result = region.get(get); + byte[] value = result.getValue(family, Bytes.toBytes(i)); + assertArrayEquals(Bytes.toBytes(i), value); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + wals.close(); + } + } + + @Test + public void testFlushMarkers() throws Exception { + // tests that flush markers are written to WAL and handled at recovered edits + String method = name.getMethodName(); + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + Path logDir = TEST_UTIL.getDataTestDirOnTestFS(method + ".log"); + final Configuration walConf = new Configuration(TEST_UTIL.getConfiguration()); + FSUtils.setRootDir(walConf, logDir); + final WALFactory wals = new WALFactory(walConf, null, method); + final WAL wal = wals.getWAL(tableName.getName(), tableName.getNamespace()); + + this.region = initHRegion(tableName, HConstants.EMPTY_START_ROW, + HConstants.EMPTY_END_ROW, method, CONF, false, Durability.USE_DEFAULT, wal, family); + try { + Path regiondir = region.getRegionFileSystem().getRegionDir(); + FileSystem fs = region.getRegionFileSystem().getFileSystem(); + byte[] regionName = region.getRegionInfo().getEncodedNameAsBytes(); + + long maxSeqId = 3; + long minSeqId = 0; + + for (long i = minSeqId; i < maxSeqId; i++) { + Put put = new Put(Bytes.toBytes(i)); + put.add(family, Bytes.toBytes(i), Bytes.toBytes(i)); + region.put(put); + region.flush(true,true); + assertEquals(i+1, region.getStore(family).getStorefilesCount()); + } + + // this will create a region with 3 files from flush + assertEquals(3, region.getStore(family).getStorefilesCount()); + List storeFiles = new ArrayList(3); + for (StoreFile sf : region.getStore(family).getStorefiles()) { + storeFiles.add(sf.getPath().getName()); + } + + // now verify that the flush markers are written + wal.shutdown(); + WAL.Reader reader = wals.createReader(fs, DefaultWALProvider.getCurrentFileName(wal), + TEST_UTIL.getConfiguration()); + try { + List flushDescriptors = new ArrayList(); + long lastFlushSeqId = -1; + while (true) { + WAL.Entry entry = reader.next(); + if (entry == null) { + break; + } + Cell cell = entry.getEdit().getCells().get(0); + if (WALEdit.isMetaEditFamily(cell)) { + FlushDescriptor flushDesc = WALEdit.getFlushDescriptor(cell); + assertNotNull(flushDesc); + assertArrayEquals(tableName.getName(), flushDesc.getTableName().toByteArray()); + if (flushDesc.getAction() == FlushDescriptor.FlushAction.START_FLUSH) { + assertTrue(flushDesc.getFlushSequenceNumber() > lastFlushSeqId); + } else if (flushDesc.getAction() == FlushDescriptor.FlushAction.COMMIT_FLUSH) { + assertTrue(flushDesc.getFlushSequenceNumber() == lastFlushSeqId); + } + lastFlushSeqId = flushDesc.getFlushSequenceNumber(); + assertArrayEquals(regionName, flushDesc.getEncodedRegionName().toByteArray()); + assertEquals(1, flushDesc.getStoreFlushesCount()); //only one store + FlushDescriptor.StoreFlushDescriptor storeFlushDesc = flushDesc.getStoreFlushes(0); + assertArrayEquals(family, storeFlushDesc.getFamilyName().toByteArray()); + assertEquals("family", storeFlushDesc.getStoreHomeDir()); + if (flushDesc.getAction() == FlushDescriptor.FlushAction.START_FLUSH) { + assertEquals(0, storeFlushDesc.getFlushOutputCount()); + } else { + assertEquals(1, storeFlushDesc.getFlushOutputCount()); //only one file from flush + assertTrue(storeFiles.contains(storeFlushDesc.getFlushOutput(0))); + } + + flushDescriptors.add(entry); + } + } + + assertEquals(3 * 2, flushDescriptors.size()); // START_FLUSH and COMMIT_FLUSH per flush + + // now write those markers to the recovered edits again. + + Path recoveredEditsDir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir); + + Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", 1000)); + fs.create(recoveredEdits); + WALProvider.Writer writer = wals.createRecoveredEditsWriter(fs, recoveredEdits); + + for (WAL.Entry entry : flushDescriptors) { + writer.append(entry); + } + writer.close(); + } finally { + if (null != reader) { + try { + reader.close(); + } catch (IOException exception) { + LOG.warn("Problem closing wal: " + exception.getMessage()); + LOG.debug("exception details", exception); + } + } + } + + + // close the region now, and reopen again + region.close(); + region = HRegion.openHRegion(region, null); + + // now check whether we have can read back the data from region + for (long i = minSeqId; i < maxSeqId; i++) { + Get get = new Get(Bytes.toBytes(i)); + Result result = region.get(get); + byte[] value = result.getValue(family, Bytes.toBytes(i)); + assertArrayEquals(Bytes.toBytes(i), value); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + wals.close(); + } + } + + class IsFlushWALMarker extends ArgumentMatcher { + volatile FlushAction[] actions; + public IsFlushWALMarker(FlushAction... actions) { + this.actions = actions; + } + @Override + public boolean matches(Object edit) { + List cells = ((WALEdit)edit).getCells(); + if (cells.isEmpty()) { + return false; + } + if (WALEdit.isMetaEditFamily(cells.get(0))) { + FlushDescriptor desc = null; + try { + desc = WALEdit.getFlushDescriptor(cells.get(0)); + } catch (IOException e) { + LOG.warn(e); + return false; + } + if (desc != null) { + for (FlushAction action : actions) { + if (desc.getAction() == action) { + return true; + } + } + } + } + return false; + } + public IsFlushWALMarker set(FlushAction... actions) { + this.actions = actions; + return this; + } + } + + @Test + @SuppressWarnings("unchecked") + public void testFlushMarkersWALFail() throws Exception { + // test the cases where the WAL append for flush markers fail. + String method = name.getMethodName(); + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + + // spy an actual WAL implementation to throw exception (was not able to mock) + Path logDir = TEST_UTIL.getDataTestDirOnTestFS(method + "log"); + + final Configuration walConf = new Configuration(TEST_UTIL.getConfiguration()); + FSUtils.setRootDir(walConf, logDir); + // Make up a WAL that we can manipulate at append time. + class FailAppendFlushMarkerWAL extends FSHLog { + volatile FlushAction [] flushActions = null; + + public FailAppendFlushMarkerWAL(FileSystem fs, Path root, String logDir, Configuration conf) + throws IOException { + super(fs, root, logDir, conf); + } + + @Override + protected Writer createWriterInstance(Path path) throws IOException { + final Writer w = super.createWriterInstance(path); + return new Writer() { + @Override + public void close() throws IOException { + w.close(); + } + + @Override + public void sync() throws IOException { + w.sync(); + } + + @Override + public void append(Entry entry) throws IOException { + List cells = entry.getEdit().getCells(); + if (WALEdit.isMetaEditFamily(cells.get(0))) { + FlushDescriptor desc = WALEdit.getFlushDescriptor(cells.get(0)); + if (desc != null) { + for (FlushAction flushAction: flushActions) { + if (desc.getAction().equals(flushAction)) { + throw new IOException("Failed to append flush marker! " + flushAction); + } + } + } + } + w.append(entry); + } + + @Override + public long getLength() throws IOException { + return w.getLength(); + } + }; + } + } + // final WALFactory wals = new WALFactory(walConf, null, method); +// WAL wal = spy(wals.getWAL(tableName.getName(), tableName.getNamespace())); + FailAppendFlushMarkerWAL wal = + new FailAppendFlushMarkerWAL(FileSystem.get(walConf), FSUtils.getRootDir(walConf), + getName(), walConf); + this.region = initHRegion(tableName, HConstants.EMPTY_START_ROW, + HConstants.EMPTY_END_ROW, method, CONF, false, Durability.USE_DEFAULT, wal, family); + try { + int i = 0; + Put put = new Put(Bytes.toBytes(i)); + put.setDurability(Durability.SKIP_WAL); // have to skip mocked wal + put.add(family, Bytes.toBytes(i), Bytes.toBytes(i)); + region.put(put); + + // 1. Test case where START_FLUSH throws exception + wal.flushActions = new FlushAction[] {FlushAction.START_FLUSH}; + + // start cache flush will throw exception + try { + region.flush(true,true); + fail("This should have thrown exception"); + } catch (DroppedSnapshotException unexpected) { + // this should not be a dropped snapshot exception. Meaning that RS will not abort + throw unexpected; + } catch (IOException expected) { + // expected + } + + // The WAL is hosed. It has failed an append and a sync. It has an exception stuck in it + // which it will keep returning until we roll the WAL to prevent any further appends going + // in or syncs succeeding on top of failed appends, a no-no. + wal.rollWriter(true); + + // 2. Test case where START_FLUSH succeeds but COMMIT_FLUSH will throw exception + wal.flushActions = new FlushAction[] {FlushAction.COMMIT_FLUSH}; + + try { + region.flush(true,true); + fail("This should have thrown exception"); + } catch (DroppedSnapshotException expected) { + // we expect this exception, since we were able to write the snapshot, but failed to + // write the flush marker to WAL + } catch (IOException unexpected) { + throw unexpected; + } + + region.close(); + // Roll WAL to clean out any exceptions stuck in it. See note above where we roll WAL. + wal.rollWriter(true); + this.region = initHRegion(tableName, HConstants.EMPTY_START_ROW, + HConstants.EMPTY_END_ROW, method, CONF, false, Durability.USE_DEFAULT, wal, family); + region.put(put); + + // 3. Test case where ABORT_FLUSH will throw exception. + // Even if ABORT_FLUSH throws exception, we should not fail with IOE, but continue with + // DroppedSnapshotException. Below COMMMIT_FLUSH will cause flush to abort + wal.flushActions = new FlushAction[] {FlushAction.COMMIT_FLUSH, FlushAction.ABORT_FLUSH}; + + try { + region.flush(true,true); + fail("This should have thrown exception"); + } catch (DroppedSnapshotException expected) { + // we expect this exception, since we were able to write the snapshot, but failed to + // write the flush marker to WAL + } catch (IOException unexpected) { + throw unexpected; + } + + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testGetWhileRegionClose() throws IOException { + TableName tableName = TableName.valueOf(name.getMethodName()); + Configuration hc = initSplit(); + int numRows = 100; + byte[][] families = { fam1, fam2, fam3 }; + + // Setting up region + String method = name.getMethodName(); + this.region = initHRegion(tableName, method, hc, families); + try { + // Put data in region + final int startRow = 100; + putData(startRow, numRows, qual1, families); + putData(startRow, numRows, qual2, families); + putData(startRow, numRows, qual3, families); + final AtomicBoolean done = new AtomicBoolean(false); + final AtomicInteger gets = new AtomicInteger(0); + GetTillDoneOrException[] threads = new GetTillDoneOrException[10]; + try { + // Set ten threads running concurrently getting from the region. + for (int i = 0; i < threads.length / 2; i++) { + threads[i] = new GetTillDoneOrException(i, Bytes.toBytes("" + startRow), done, gets); + threads[i].setDaemon(true); + threads[i].start(); + } + // Artificially make the condition by setting closing flag explicitly. + // I can't make the issue happen with a call to region.close(). + this.region.closing.set(true); + for (int i = threads.length / 2; i < threads.length; i++) { + threads[i] = new GetTillDoneOrException(i, Bytes.toBytes("" + startRow), done, gets); + threads[i].setDaemon(true); + threads[i].start(); + } + } finally { + if (this.region != null) { + HBaseTestingUtility.closeRegionAndWAL(this.region); + } + } + done.set(true); + for (GetTillDoneOrException t : threads) { + try { + t.join(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + if (t.e != null) { + LOG.info("Exception=" + t.e); + assertFalse("Found a NPE in " + t.getName(), t.e instanceof NullPointerException); + } + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + /* + * Thread that does get on single row until 'done' flag is flipped. If an + * exception causes us to fail, it records it. + */ + class GetTillDoneOrException extends Thread { + private final Get g; + private final AtomicBoolean done; + private final AtomicInteger count; + private Exception e; + + GetTillDoneOrException(final int i, final byte[] r, final AtomicBoolean d, final AtomicInteger c) { + super("getter." + i); + this.g = new Get(r); + this.done = d; + this.count = c; + } + + @Override + public void run() { + while (!this.done.get()) { + try { + assertTrue(region.get(g).size() > 0); + this.count.incrementAndGet(); + } catch (Exception e) { + this.e = e; + break; + } + } + } + } + + /* + * An involved filter test. Has multiple column families and deletes in mix. + */ + @Test + public void testWeirdCacheBehaviour() throws Exception { + TableName TABLE = TableName.valueOf("testWeirdCacheBehaviour"); + byte[][] FAMILIES = new byte[][] { Bytes.toBytes("trans-blob"), Bytes.toBytes("trans-type"), + Bytes.toBytes("trans-date"), Bytes.toBytes("trans-tags"), Bytes.toBytes("trans-group") }; + this.region = initHRegion(TABLE, getName(), CONF, FAMILIES); + try { + String value = "this is the value"; + String value2 = "this is some other value"; + String keyPrefix1 = "prefix1"; + String keyPrefix2 = "prefix2"; + String keyPrefix3 = "prefix3"; + putRows(this.region, 3, value, keyPrefix1); + putRows(this.region, 3, value, keyPrefix2); + putRows(this.region, 3, value, keyPrefix3); + putRows(this.region, 3, value2, keyPrefix1); + putRows(this.region, 3, value2, keyPrefix2); + putRows(this.region, 3, value2, keyPrefix3); + System.out.println("Checking values for key: " + keyPrefix1); + assertEquals("Got back incorrect number of rows from scan", 3, + getNumberOfRows(keyPrefix1, value2, this.region)); + System.out.println("Checking values for key: " + keyPrefix2); + assertEquals("Got back incorrect number of rows from scan", 3, + getNumberOfRows(keyPrefix2, value2, this.region)); + System.out.println("Checking values for key: " + keyPrefix3); + assertEquals("Got back incorrect number of rows from scan", 3, + getNumberOfRows(keyPrefix3, value2, this.region)); + deleteColumns(this.region, value2, keyPrefix1); + deleteColumns(this.region, value2, keyPrefix2); + deleteColumns(this.region, value2, keyPrefix3); + System.out.println("Starting important checks....."); + assertEquals("Got back incorrect number of rows from scan: " + keyPrefix1, 0, + getNumberOfRows(keyPrefix1, value2, this.region)); + assertEquals("Got back incorrect number of rows from scan: " + keyPrefix2, 0, + getNumberOfRows(keyPrefix2, value2, this.region)); + assertEquals("Got back incorrect number of rows from scan: " + keyPrefix3, 0, + getNumberOfRows(keyPrefix3, value2, this.region)); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testAppendWithReadOnlyTable() throws Exception { + TableName TABLE = TableName.valueOf("readOnlyTable"); + this.region = initHRegion(TABLE, getName(), CONF, true, Bytes.toBytes("somefamily")); + boolean exceptionCaught = false; + Append append = new Append(Bytes.toBytes("somerow")); + append.setDurability(Durability.SKIP_WAL); + append.add(Bytes.toBytes("somefamily"), Bytes.toBytes("somequalifier"), + Bytes.toBytes("somevalue")); + try { + region.append(append); + } catch (IOException e) { + exceptionCaught = true; + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + assertTrue(exceptionCaught == true); + } + + @Test + public void testIncrWithReadOnlyTable() throws Exception { + TableName TABLE = TableName.valueOf("readOnlyTable"); + this.region = initHRegion(TABLE, getName(), CONF, true, Bytes.toBytes("somefamily")); + boolean exceptionCaught = false; + Increment inc = new Increment(Bytes.toBytes("somerow")); + inc.setDurability(Durability.SKIP_WAL); + inc.addColumn(Bytes.toBytes("somefamily"), Bytes.toBytes("somequalifier"), 1L); + try { + region.increment(inc); + } catch (IOException e) { + exceptionCaught = true; + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + assertTrue(exceptionCaught == true); + } + + private void deleteColumns(HRegion r, String value, String keyPrefix) throws IOException { + InternalScanner scanner = buildScanner(keyPrefix, value, r); + int count = 0; + boolean more = false; + List results = new ArrayList(); + do { + more = scanner.next(results); + if (results != null && !results.isEmpty()) + count++; + else + break; + Delete delete = new Delete(CellUtil.cloneRow(results.get(0))); + delete.deleteColumn(Bytes.toBytes("trans-tags"), Bytes.toBytes("qual2")); + r.delete(delete); + results.clear(); + } while (more); + assertEquals("Did not perform correct number of deletes", 3, count); + } + + private int getNumberOfRows(String keyPrefix, String value, HRegion r) throws Exception { + InternalScanner resultScanner = buildScanner(keyPrefix, value, r); + int numberOfResults = 0; + List results = new ArrayList(); + boolean more = false; + do { + more = resultScanner.next(results); + if (results != null && !results.isEmpty()) + numberOfResults++; + else + break; + for (Cell kv : results) { + System.out.println("kv=" + kv.toString() + ", " + Bytes.toString(CellUtil.cloneValue(kv))); + } + results.clear(); + } while (more); + return numberOfResults; + } + + private InternalScanner buildScanner(String keyPrefix, String value, HRegion r) + throws IOException { + // Defaults FilterList.Operator.MUST_PASS_ALL. + FilterList allFilters = new FilterList(); + allFilters.addFilter(new PrefixFilter(Bytes.toBytes(keyPrefix))); + // Only return rows where this column value exists in the row. + SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("trans-tags"), + Bytes.toBytes("qual2"), CompareFilter.CompareOp.EQUAL, Bytes.toBytes(value)); + filter.setFilterIfMissing(true); + allFilters.addFilter(filter); + Scan scan = new Scan(); + scan.addFamily(Bytes.toBytes("trans-blob")); + scan.addFamily(Bytes.toBytes("trans-type")); + scan.addFamily(Bytes.toBytes("trans-date")); + scan.addFamily(Bytes.toBytes("trans-tags")); + scan.addFamily(Bytes.toBytes("trans-group")); + scan.setFilter(allFilters); + return r.getScanner(scan); + } + + private void putRows(HRegion r, int numRows, String value, String key) throws IOException { + for (int i = 0; i < numRows; i++) { + String row = key + "_" + i/* UUID.randomUUID().toString() */; + System.out.println(String.format("Saving row: %s, with value %s", row, value)); + Put put = new Put(Bytes.toBytes(row)); + put.setDurability(Durability.SKIP_WAL); + put.add(Bytes.toBytes("trans-blob"), null, Bytes.toBytes("value for blob")); + put.add(Bytes.toBytes("trans-type"), null, Bytes.toBytes("statement")); + put.add(Bytes.toBytes("trans-date"), null, Bytes.toBytes("20090921010101999")); + put.add(Bytes.toBytes("trans-tags"), Bytes.toBytes("qual2"), Bytes.toBytes(value)); + put.add(Bytes.toBytes("trans-group"), null, Bytes.toBytes("adhocTransactionGroupId")); + r.put(put); + } + } + + @Test + public void testFamilyWithAndWithoutColon() throws Exception { + TableName b = TableName.valueOf(getName()); + byte[] cf = Bytes.toBytes(COLUMN_FAMILY); + this.region = initHRegion(b, getName(), CONF, cf); + try { + Put p = new Put(b.toBytes()); + byte[] cfwithcolon = Bytes.toBytes(COLUMN_FAMILY + ":"); + p.add(cfwithcolon, cfwithcolon, cfwithcolon); + boolean exception = false; + try { + this.region.put(p); + } catch (NoSuchColumnFamilyException e) { + exception = true; + } + assertTrue(exception); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testBatchPut_whileNoRowLocksHeld() throws IOException { + byte[] cf = Bytes.toBytes(COLUMN_FAMILY); + byte[] qual = Bytes.toBytes("qual"); + byte[] val = Bytes.toBytes("val"); + this.region = initHRegion(TableName.valueOf(getName()), getName(), CONF, cf); + MetricsWALSource source = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class); + try { + long syncs = metricsAssertHelper.getCounter("syncTimeNumOps", source); + metricsAssertHelper.assertCounter("syncTimeNumOps", syncs, source); + + LOG.info("First a batch put with all valid puts"); + final Put[] puts = new Put[10]; + for (int i = 0; i < 10; i++) { + puts[i] = new Put(Bytes.toBytes("row_" + i)); + puts[i].add(cf, qual, val); + } + + OperationStatus[] codes = this.region.batchMutate(puts); + assertEquals(10, codes.length); + for (int i = 0; i < 10; i++) { + assertEquals(HConstants.OperationStatusCode.SUCCESS, codes[i].getOperationStatusCode()); + } + metricsAssertHelper.assertCounter("syncTimeNumOps", syncs + 1, source); + + LOG.info("Next a batch put with one invalid family"); + puts[5].add(Bytes.toBytes("BAD_CF"), qual, val); + codes = this.region.batchMutate(puts); + assertEquals(10, codes.length); + for (int i = 0; i < 10; i++) { + assertEquals((i == 5) ? HConstants.OperationStatusCode.BAD_FAMILY : HConstants.OperationStatusCode.SUCCESS, + codes[i].getOperationStatusCode()); + } + + metricsAssertHelper.assertCounter("syncTimeNumOps", syncs + 2, source); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testBatchPut_whileMultipleRowLocksHeld() throws Exception { + byte[] cf = Bytes.toBytes(COLUMN_FAMILY); + byte[] qual = Bytes.toBytes("qual"); + byte[] val = Bytes.toBytes("val"); + this.region = initHRegion(TableName.valueOf(getName()), getName(), CONF, cf); + MetricsWALSource source = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class); + try { + long syncs = metricsAssertHelper.getCounter("syncTimeNumOps", source); + metricsAssertHelper.assertCounter("syncTimeNumOps", syncs, source); + + final Put[] puts = new Put[10]; + for (int i = 0; i < 10; i++) { + puts[i] = new Put(Bytes.toBytes("row_" + i)); + puts[i].add(cf, qual, val); + } + puts[5].add(Bytes.toBytes("BAD_CF"), qual, val); + + LOG.info("batchPut will have to break into four batches to avoid row locks"); + Region.RowLock rowLock1 = region.getRowLock(Bytes.toBytes("row_2")); + Region.RowLock rowLock2 = region.getRowLock(Bytes.toBytes("row_1")); + Region.RowLock rowLock3 = region.getRowLock(Bytes.toBytes("row_3")); + Region.RowLock rowLock4 = region.getRowLock(Bytes.toBytes("row_3"), true); + + + MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(CONF); + final AtomicReference retFromThread = new AtomicReference(); + final CountDownLatch startingPuts = new CountDownLatch(1); + final CountDownLatch startingClose = new CountDownLatch(1); + MultithreadedTestUtil.TestThread putter = new MultithreadedTestUtil.TestThread(ctx) { + @Override + public void doWork() throws IOException { + startingPuts.countDown(); + retFromThread.set(region.batchMutate(puts)); + } + }; + LOG.info("...starting put thread while holding locks"); + ctx.addThread(putter); + ctx.startThreads(); + + // Now attempt to close the region from another thread. Prior to HBASE-12565 + // this would cause the in-progress batchMutate operation to to fail with + // exception because it use to release and re-acquire the close-guard lock + // between batches. Caller then didn't get status indicating which writes succeeded. + // We now expect this thread to block until the batchMutate call finishes. + Thread regionCloseThread = new MultithreadedTestUtil.TestThread(ctx) { + @Override + public void doWork() { + try { + startingPuts.await(); + // Give some time for the batch mutate to get in. + // We don't want to race with the mutate + Thread.sleep(10); + startingClose.countDown(); + HBaseTestingUtility.closeRegionAndWAL(region); + } catch (IOException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }; + regionCloseThread.start(); + + startingClose.await(); + startingPuts.await(); + Thread.sleep(100); + LOG.info("...releasing row lock 1, which should let put thread continue"); + rowLock1.release(); + rowLock2.release(); + rowLock3.release(); + waitForCounter(source, "syncTimeNumOps", syncs + 1); + + LOG.info("...joining on put thread"); + ctx.stop(); + regionCloseThread.join(); + + OperationStatus[] codes = retFromThread.get(); + for (int i = 0; i < codes.length; i++) { + assertEquals((i == 5) ? HConstants.OperationStatusCode.BAD_FAMILY : HConstants.OperationStatusCode.SUCCESS, + codes[i].getOperationStatusCode()); + } + rowLock4.release(); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + private void waitForCounter(MetricsWALSource source, String metricName, long expectedCount) + throws InterruptedException { + long startWait = System.currentTimeMillis(); + long currentCount; + while ((currentCount = metricsAssertHelper.getCounter(metricName, source)) < expectedCount) { + Thread.sleep(100); + if (System.currentTimeMillis() - startWait > 10000) { + fail(String.format("Timed out waiting for '%s' >= '%s', currentCount=%s", metricName, + expectedCount, currentCount)); + } + } + } + + @Test + public void testBatchPutWithTsSlop() throws Exception { + TableName b = TableName.valueOf(getName()); + byte[] cf = Bytes.toBytes(COLUMN_FAMILY); + byte[] qual = Bytes.toBytes("qual"); + byte[] val = Bytes.toBytes("val"); + + // add data with a timestamp that is too recent for range. Ensure assert + CONF.setInt("hbase.hregion.keyvalue.timestamp.slop.millisecs", 1000); + this.region = initHRegion(b, getName(), CONF, cf); + + try { + MetricsWALSource source = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class); + long syncs = metricsAssertHelper.getCounter("syncTimeNumOps", source); + metricsAssertHelper.assertCounter("syncTimeNumOps", syncs, source); + + final Put[] puts = new Put[10]; + for (int i = 0; i < 10; i++) { + puts[i] = new Put(Bytes.toBytes("row_" + i), Long.MAX_VALUE - 100); + puts[i].add(cf, qual, val); + } + + OperationStatus[] codes = this.region.batchMutate(puts); + assertEquals(10, codes.length); + for (int i = 0; i < 10; i++) { + assertEquals(HConstants.OperationStatusCode.SANITY_CHECK_FAILURE, codes[i].getOperationStatusCode()); + } + metricsAssertHelper.assertCounter("syncTimeNumOps", syncs, source); + + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + + } + + // //////////////////////////////////////////////////////////////////////////// + // checkAndMutate tests + // //////////////////////////////////////////////////////////////////////////// + @Test + public void testCheckAndMutate_WithEmptyRowValue() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] qf1 = Bytes.toBytes("qualifier"); + byte[] emptyVal = new byte[] {}; + byte[] val1 = Bytes.toBytes("value1"); + byte[] val2 = Bytes.toBytes("value2"); + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + // Putting empty data in key + Put put = new Put(row1); + put.add(fam1, qf1, emptyVal); + + // checkAndPut with empty value + boolean res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator( + emptyVal), put, true); + assertTrue(res); + + // Putting data in key + put = new Put(row1); + put.add(fam1, qf1, val1); + + // checkAndPut with correct value + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(emptyVal), + put, true); + assertTrue(res); + + // not empty anymore + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(emptyVal), + put, true); + assertFalse(res); + + Delete delete = new Delete(row1); + delete.deleteColumn(fam1, qf1); + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(emptyVal), + delete, true); + assertFalse(res); + + put = new Put(row1); + put.add(fam1, qf1, val2); + // checkAndPut with correct value + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(val1), + put, true); + assertTrue(res); + + // checkAndDelete with correct value + delete = new Delete(row1); + delete.deleteColumn(fam1, qf1); + delete.deleteColumn(fam1, qf1); + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(val2), + delete, true); + assertTrue(res); + + delete = new Delete(row1); + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(emptyVal), + delete, true); + assertTrue(res); + + // checkAndPut looking for a null value + put = new Put(row1); + put.add(fam1, qf1, val1); + + res = region + .checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new NullComparator(), put, true); + assertTrue(res); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testCheckAndMutate_WithWrongValue() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] qf1 = Bytes.toBytes("qualifier"); + byte[] val1 = Bytes.toBytes("value1"); + byte[] val2 = Bytes.toBytes("value2"); + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + // Putting data in key + Put put = new Put(row1); + put.add(fam1, qf1, val1); + region.put(put); + + // checkAndPut with wrong value + boolean res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator( + val2), put, true); + assertEquals(false, res); + + // checkAndDelete with wrong value + Delete delete = new Delete(row1); + delete.deleteFamily(fam1); + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(val2), + put, true); + assertEquals(false, res); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testCheckAndMutate_WithCorrectValue() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] qf1 = Bytes.toBytes("qualifier"); + byte[] val1 = Bytes.toBytes("value1"); + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + // Putting data in key + Put put = new Put(row1); + put.add(fam1, qf1, val1); + region.put(put); + + // checkAndPut with correct value + boolean res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator( + val1), put, true); + assertEquals(true, res); + + // checkAndDelete with correct value + Delete delete = new Delete(row1); + delete.deleteColumn(fam1, qf1); + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(val1), + delete, true); + assertEquals(true, res); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testCheckAndMutate_WithNonEqualCompareOp() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] qf1 = Bytes.toBytes("qualifier"); + byte[] val1 = Bytes.toBytes("value1"); + byte[] val2 = Bytes.toBytes("value2"); + byte[] val3 = Bytes.toBytes("value3"); + byte[] val4 = Bytes.toBytes("value4"); + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + // Putting val3 in key + Put put = new Put(row1); + put.add(fam1, qf1, val3); + region.put(put); + + // Test CompareOp.LESS: original = val3, compare with val3, fail + boolean res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.LESS, + new BinaryComparator(val3), put, true); + assertEquals(false, res); + + // Test CompareOp.LESS: original = val3, compare with val4, fail + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.LESS, + new BinaryComparator(val4), put, true); + assertEquals(false, res); + + // Test CompareOp.LESS: original = val3, compare with val2, + // succeed (now value = val2) + put = new Put(row1); + put.add(fam1, qf1, val2); + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.LESS, + new BinaryComparator(val2), put, true); + assertEquals(true, res); + + // Test CompareOp.LESS_OR_EQUAL: original = val2, compare with val3, fail + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.LESS_OR_EQUAL, + new BinaryComparator(val3), put, true); + assertEquals(false, res); + + // Test CompareOp.LESS_OR_EQUAL: original = val2, compare with val2, + // succeed (value still = val2) + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.LESS_OR_EQUAL, + new BinaryComparator(val2), put, true); + assertEquals(true, res); + + // Test CompareOp.LESS_OR_EQUAL: original = val2, compare with val1, + // succeed (now value = val3) + put = new Put(row1); + put.add(fam1, qf1, val3); + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.LESS_OR_EQUAL, + new BinaryComparator(val1), put, true); + assertEquals(true, res); + + // Test CompareOp.GREATER: original = val3, compare with val3, fail + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.GREATER, + new BinaryComparator(val3), put, true); + assertEquals(false, res); + + // Test CompareOp.GREATER: original = val3, compare with val2, fail + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.GREATER, + new BinaryComparator(val2), put, true); + assertEquals(false, res); + + // Test CompareOp.GREATER: original = val3, compare with val4, + // succeed (now value = val2) + put = new Put(row1); + put.add(fam1, qf1, val2); + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.GREATER, + new BinaryComparator(val4), put, true); + assertEquals(true, res); + + // Test CompareOp.GREATER_OR_EQUAL: original = val2, compare with val1, fail + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.GREATER_OR_EQUAL, + new BinaryComparator(val1), put, true); + assertEquals(false, res); + + // Test CompareOp.GREATER_OR_EQUAL: original = val2, compare with val2, + // succeed (value still = val2) + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.GREATER_OR_EQUAL, + new BinaryComparator(val2), put, true); + assertEquals(true, res); + + // Test CompareOp.GREATER_OR_EQUAL: original = val2, compare with val3, succeed + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.GREATER_OR_EQUAL, + new BinaryComparator(val3), put, true); + assertEquals(true, res); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testCheckAndPut_ThatPutWasWritten() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] fam2 = Bytes.toBytes("fam2"); + byte[] qf1 = Bytes.toBytes("qualifier"); + byte[] val1 = Bytes.toBytes("value1"); + byte[] val2 = Bytes.toBytes("value2"); + + byte[][] families = { fam1, fam2 }; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + // Putting data in the key to check + Put put = new Put(row1); + put.add(fam1, qf1, val1); + region.put(put); + + // Creating put to add + long ts = System.currentTimeMillis(); + KeyValue kv = new KeyValue(row1, fam2, qf1, ts, KeyValue.Type.Put, val2); + put = new Put(row1); + put.add(kv); + + // checkAndPut with wrong value + boolean res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator( + val1), put, true); + assertEquals(true, res); + + Get get = new Get(row1); + get.addColumn(fam2, qf1); + Cell[] actual = region.get(get).rawCells(); + + Cell[] expected = { kv }; + + assertEquals(expected.length, actual.length); + for (int i = 0; i < actual.length; i++) { + assertEquals(expected[i], actual[i]); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testCheckAndPut_wrongRowInPut() throws IOException { + TableName tableName = TableName.valueOf(name.getMethodName()); + this.region = initHRegion(tableName, this.getName(), CONF, COLUMNS); + try { + Put put = new Put(row2); + put.add(fam1, qual1, value1); + try { + region.checkAndMutate(row, fam1, qual1, CompareFilter.CompareOp.EQUAL, + new BinaryComparator(value2), put, false); + fail(); + } catch (org.apache.hadoop.hbase.DoNotRetryIOException expected) { + // expected exception. + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testCheckAndDelete_ThatDeleteWasWritten() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] fam2 = Bytes.toBytes("fam2"); + byte[] qf1 = Bytes.toBytes("qualifier1"); + byte[] qf2 = Bytes.toBytes("qualifier2"); + byte[] qf3 = Bytes.toBytes("qualifier3"); + byte[] val1 = Bytes.toBytes("value1"); + byte[] val2 = Bytes.toBytes("value2"); + byte[] val3 = Bytes.toBytes("value3"); + byte[] emptyVal = new byte[] {}; + + byte[][] families = { fam1, fam2 }; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + // Put content + Put put = new Put(row1); + put.add(fam1, qf1, val1); + region.put(put); + Threads.sleep(2); + + put = new Put(row1); + put.add(fam1, qf1, val2); + put.add(fam2, qf1, val3); + put.add(fam2, qf2, val2); + put.add(fam2, qf3, val1); + put.add(fam1, qf3, val1); + region.put(put); + + // Multi-column delete + Delete delete = new Delete(row1); + delete.deleteColumn(fam1, qf1); + delete.deleteColumn(fam2, qf1); + delete.deleteColumn(fam1, qf3); + boolean res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator( + val2), delete, true); + assertEquals(true, res); + + Get get = new Get(row1); + get.addColumn(fam1, qf1); + get.addColumn(fam1, qf3); + get.addColumn(fam2, qf2); + Result r = region.get(get); + assertEquals(2, r.size()); + assertArrayEquals(val1, r.getValue(fam1, qf1)); + assertArrayEquals(val2, r.getValue(fam2, qf2)); + + // Family delete + delete = new Delete(row1); + delete.deleteFamily(fam2); + res = region.checkAndMutate(row1, fam2, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(emptyVal), + delete, true); + assertEquals(true, res); + + get = new Get(row1); + r = region.get(get); + assertEquals(1, r.size()); + assertArrayEquals(val1, r.getValue(fam1, qf1)); + + // Row delete + delete = new Delete(row1); + res = region.checkAndMutate(row1, fam1, qf1, CompareFilter.CompareOp.EQUAL, new BinaryComparator(val1), + delete, true); + assertEquals(true, res); + get = new Get(row1); + r = region.get(get); + assertEquals(0, r.size()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + // //////////////////////////////////////////////////////////////////////////// + // Delete tests + // //////////////////////////////////////////////////////////////////////////// + @Test + public void testDelete_multiDeleteColumn() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] qual = Bytes.toBytes("qualifier"); + byte[] value = Bytes.toBytes("value"); + + Put put = new Put(row1); + put.add(fam1, qual, 1, value); + put.add(fam1, qual, 2, value); + + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + region.put(put); + + // We do support deleting more than 1 'latest' version + Delete delete = new Delete(row1); + delete.deleteColumn(fam1, qual); + delete.deleteColumn(fam1, qual); + region.delete(delete); + + Get get = new Get(row1); + get.addFamily(fam1); + Result r = region.get(get); + assertEquals(0, r.size()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testDelete_CheckFamily() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] fam2 = Bytes.toBytes("fam2"); + byte[] fam3 = Bytes.toBytes("fam3"); + byte[] fam4 = Bytes.toBytes("fam4"); + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1, fam2, fam3); + try { + List kvs = new ArrayList(); + kvs.add(new KeyValue(row1, fam4, null, null)); + + // testing existing family + byte[] family = fam2; + try { + NavigableMap> deleteMap = new TreeMap>( + Bytes.BYTES_COMPARATOR); + deleteMap.put(family, kvs); + region.delete(deleteMap, Durability.SYNC_WAL); + } catch (Exception e) { + assertTrue("Family " + new String(family) + " does not exist", false); + } + + // testing non existing family + boolean ok = false; + family = fam4; + try { + NavigableMap> deleteMap = new TreeMap>( + Bytes.BYTES_COMPARATOR); + deleteMap.put(family, kvs); + region.delete(deleteMap, Durability.SYNC_WAL); + } catch (Exception e) { + ok = true; + } + assertEquals("Family " + new String(family) + " does exist", true, ok); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testDelete_mixed() throws IOException, InterruptedException { + byte[] fam = Bytes.toBytes("info"); + byte[][] families = { fam }; + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + EnvironmentEdgeManagerTestHelper.injectEdge(new IncrementingEnvironmentEdge()); + + byte[] row = Bytes.toBytes("table_name"); + // column names + byte[] serverinfo = Bytes.toBytes("serverinfo"); + byte[] splitA = Bytes.toBytes("splitA"); + byte[] splitB = Bytes.toBytes("splitB"); + + // add some data: + Put put = new Put(row); + put.add(fam, splitA, Bytes.toBytes("reference_A")); + region.put(put); + + put = new Put(row); + put.add(fam, splitB, Bytes.toBytes("reference_B")); + region.put(put); + + put = new Put(row); + put.add(fam, serverinfo, Bytes.toBytes("ip_address")); + region.put(put); + + // ok now delete a split: + Delete delete = new Delete(row); + delete.deleteColumns(fam, splitA); + region.delete(delete); + + // assert some things: + Get get = new Get(row).addColumn(fam, serverinfo); + Result result = region.get(get); + assertEquals(1, result.size()); + + get = new Get(row).addColumn(fam, splitA); + result = region.get(get); + assertEquals(0, result.size()); + + get = new Get(row).addColumn(fam, splitB); + result = region.get(get); + assertEquals(1, result.size()); + + // Assert that after a delete, I can put. + put = new Put(row); + put.add(fam, splitA, Bytes.toBytes("reference_A")); + region.put(put); + get = new Get(row); + result = region.get(get); + assertEquals(3, result.size()); + + // Now delete all... then test I can add stuff back + delete = new Delete(row); + region.delete(delete); + assertEquals(0, region.get(get).size()); + + region.put(new Put(row).add(fam, splitA, Bytes.toBytes("reference_A"))); + result = region.get(get); + assertEquals(1, result.size()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testDeleteRowWithFutureTs() throws IOException { + byte[] fam = Bytes.toBytes("info"); + byte[][] families = { fam }; + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + byte[] row = Bytes.toBytes("table_name"); + // column names + byte[] serverinfo = Bytes.toBytes("serverinfo"); + + // add data in the far future + Put put = new Put(row); + put.add(fam, serverinfo, HConstants.LATEST_TIMESTAMP - 5, Bytes.toBytes("value")); + region.put(put); + + // now delete something in the present + Delete delete = new Delete(row); + region.delete(delete); + + // make sure we still see our data + Get get = new Get(row).addColumn(fam, serverinfo); + Result result = region.get(get); + assertEquals(1, result.size()); + + // delete the future row + delete = new Delete(row, HConstants.LATEST_TIMESTAMP - 3); + region.delete(delete); + + // make sure it is gone + get = new Get(row).addColumn(fam, serverinfo); + result = region.get(get); + assertEquals(0, result.size()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + /** + * Tests that the special LATEST_TIMESTAMP option for puts gets replaced by + * the actual timestamp + */ + @Test + public void testPutWithLatestTS() throws IOException { + byte[] fam = Bytes.toBytes("info"); + byte[][] families = { fam }; + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + byte[] row = Bytes.toBytes("row1"); + // column names + byte[] qual = Bytes.toBytes("qual"); + + // add data with LATEST_TIMESTAMP, put without WAL + Put put = new Put(row); + put.add(fam, qual, HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value")); + region.put(put); + + // Make sure it shows up with an actual timestamp + Get get = new Get(row).addColumn(fam, qual); + Result result = region.get(get); + assertEquals(1, result.size()); + Cell kv = result.rawCells()[0]; + LOG.info("Got: " + kv); + assertTrue("LATEST_TIMESTAMP was not replaced with real timestamp", + kv.getTimestamp() != HConstants.LATEST_TIMESTAMP); + + // Check same with WAL enabled (historically these took different + // code paths, so check both) + row = Bytes.toBytes("row2"); + put = new Put(row); + put.add(fam, qual, HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value")); + region.put(put); + + // Make sure it shows up with an actual timestamp + get = new Get(row).addColumn(fam, qual); + result = region.get(get); + assertEquals(1, result.size()); + kv = result.rawCells()[0]; + LOG.info("Got: " + kv); + assertTrue("LATEST_TIMESTAMP was not replaced with real timestamp", + kv.getTimestamp() != HConstants.LATEST_TIMESTAMP); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + + } + + /** + * Tests that there is server-side filtering for invalid timestamp upper + * bound. Note that the timestamp lower bound is automatically handled for us + * by the TTL field. + */ + @Test + public void testPutWithTsSlop() throws IOException { + byte[] fam = Bytes.toBytes("info"); + byte[][] families = { fam }; + String method = this.getName(); + + // add data with a timestamp that is too recent for range. Ensure assert + CONF.setInt("hbase.hregion.keyvalue.timestamp.slop.millisecs", 1000); + this.region = initHRegion(tableName, method, CONF, families); + boolean caughtExcep = false; + try { + try { + // no TS specified == use latest. should not error + region.put(new Put(row).add(fam, Bytes.toBytes("qual"), Bytes.toBytes("value"))); + // TS out of range. should error + region.put(new Put(row).add(fam, Bytes.toBytes("qual"), System.currentTimeMillis() + 2000, + Bytes.toBytes("value"))); + fail("Expected IOE for TS out of configured timerange"); + } catch (FailedSanityCheckException ioe) { + LOG.debug("Received expected exception", ioe); + caughtExcep = true; + } + assertTrue("Should catch FailedSanityCheckException", caughtExcep); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testScanner_DeleteOneFamilyNotAnother() throws IOException { + byte[] fam1 = Bytes.toBytes("columnA"); + byte[] fam2 = Bytes.toBytes("columnB"); + this.region = initHRegion(tableName, getName(), CONF, fam1, fam2); + try { + byte[] rowA = Bytes.toBytes("rowA"); + byte[] rowB = Bytes.toBytes("rowB"); + + byte[] value = Bytes.toBytes("value"); + + Delete delete = new Delete(rowA); + delete.deleteFamily(fam1); + + region.delete(delete); + + // now create data. + Put put = new Put(rowA); + put.add(fam2, null, value); + region.put(put); + + put = new Put(rowB); + put.add(fam1, null, value); + put.add(fam2, null, value); + region.put(put); + + Scan scan = new Scan(); + scan.addFamily(fam1).addFamily(fam2); + InternalScanner s = region.getScanner(scan); + List results = new ArrayList(); + s.next(results); + assertTrue(CellUtil.matchingRow(results.get(0), rowA)); + + results.clear(); + s.next(results); + assertTrue(CellUtil.matchingRow(results.get(0), rowB)); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testDeleteColumns_PostInsert() throws IOException, InterruptedException { + Delete delete = new Delete(row); + delete.deleteColumns(fam1, qual1); + doTestDelete_AndPostInsert(delete); + } + + @Test + public void testDeleteFamily_PostInsert() throws IOException, InterruptedException { + Delete delete = new Delete(row); + delete.deleteFamily(fam1); + doTestDelete_AndPostInsert(delete); + } + + public void doTestDelete_AndPostInsert(Delete delete) throws IOException, InterruptedException { + TableName tableName = TableName.valueOf(name.getMethodName()); + this.region = initHRegion(tableName, getName(), CONF, fam1); + try { + EnvironmentEdgeManagerTestHelper.injectEdge(new IncrementingEnvironmentEdge()); + Put put = new Put(row); + put.add(fam1, qual1, value1); + region.put(put); + + // now delete the value: + region.delete(delete); + + // ok put data: + put = new Put(row); + put.add(fam1, qual1, value2); + region.put(put); + + // ok get: + Get get = new Get(row); + get.addColumn(fam1, qual1); + + Result r = region.get(get); + assertEquals(1, r.size()); + assertArrayEquals(value2, r.getValue(fam1, qual1)); + + // next: + Scan scan = new Scan(row); + scan.addColumn(fam1, qual1); + InternalScanner s = region.getScanner(scan); + + List results = new ArrayList(); + assertEquals(false, s.next(results)); + assertEquals(1, results.size()); + Cell kv = results.get(0); + + assertArrayEquals(value2, CellUtil.cloneValue(kv)); + assertArrayEquals(fam1, CellUtil.cloneFamily(kv)); + assertArrayEquals(qual1, CellUtil.cloneQualifier(kv)); + assertArrayEquals(row, CellUtil.cloneRow(kv)); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testDelete_CheckTimestampUpdated() throws IOException { + TableName tableName = TableName.valueOf(name.getMethodName()); + byte[] row1 = Bytes.toBytes("row1"); + byte[] col1 = Bytes.toBytes("col1"); + byte[] col2 = Bytes.toBytes("col2"); + byte[] col3 = Bytes.toBytes("col3"); + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + // Building checkerList + List kvs = new ArrayList(); + kvs.add(new KeyValue(row1, fam1, col1, null)); + kvs.add(new KeyValue(row1, fam1, col2, null)); + kvs.add(new KeyValue(row1, fam1, col3, null)); + + NavigableMap> deleteMap = new TreeMap>( + Bytes.BYTES_COMPARATOR); + deleteMap.put(fam1, kvs); + region.delete(deleteMap, Durability.SYNC_WAL); + + // extract the key values out the memstore: + // This is kinda hacky, but better than nothing... + long now = System.currentTimeMillis(); + AbstractMemStore memstore = ((HStore) region.getStore(fam1)).memstore; + Cell firstCell = ((HStore) region.getStore(fam1)).memstore.getActive().first(); + assertTrue(firstCell.getTimestamp() <= now); + now = firstCell.getTimestamp(); + for (Cell cell : memstore.getActive().getCellSet()) { + assertTrue(cell.getTimestamp() <= now); + now = cell.getTimestamp(); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + // //////////////////////////////////////////////////////////////////////////// + // Get tests + // //////////////////////////////////////////////////////////////////////////// + @Test + public void testGet_FamilyChecker() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] fam2 = Bytes.toBytes("False"); + byte[] col1 = Bytes.toBytes("col1"); + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + Get get = new Get(row1); + get.addColumn(fam2, col1); + + // Test + try { + region.get(get); + } catch (org.apache.hadoop.hbase.DoNotRetryIOException e) { + assertFalse(false); + return; + } + assertFalse(true); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testGet_Basic() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] col1 = Bytes.toBytes("col1"); + byte[] col2 = Bytes.toBytes("col2"); + byte[] col3 = Bytes.toBytes("col3"); + byte[] col4 = Bytes.toBytes("col4"); + byte[] col5 = Bytes.toBytes("col5"); + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + // Add to memstore + Put put = new Put(row1); + put.add(fam1, col1, null); + put.add(fam1, col2, null); + put.add(fam1, col3, null); + put.add(fam1, col4, null); + put.add(fam1, col5, null); + region.put(put); + + Get get = new Get(row1); + get.addColumn(fam1, col2); + get.addColumn(fam1, col4); + // Expected result + KeyValue kv1 = new KeyValue(row1, fam1, col2); + KeyValue kv2 = new KeyValue(row1, fam1, col4); + KeyValue[] expected = { kv1, kv2 }; + + // Test + Result res = region.get(get); + assertEquals(expected.length, res.size()); + for (int i = 0; i < res.size(); i++) { + assertTrue(CellUtil.matchingRow(expected[i], res.rawCells()[i])); + assertTrue(CellUtil.matchingFamily(expected[i], res.rawCells()[i])); + assertTrue(CellUtil.matchingQualifier(expected[i], res.rawCells()[i])); + } + + // Test using a filter on a Get + Get g = new Get(row1); + final int count = 2; + g.setFilter(new ColumnCountGetFilter(count)); + res = region.get(g); + assertEquals(count, res.size()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testGet_Empty() throws IOException { + byte[] row = Bytes.toBytes("row"); + byte[] fam = Bytes.toBytes("fam"); + + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam); + try { + Get get = new Get(row); + get.addFamily(fam); + Result r = region.get(get); + + assertTrue(r.isEmpty()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + // //////////////////////////////////////////////////////////////////////////// + // Merge test + // //////////////////////////////////////////////////////////////////////////// + @Test + public void testMerge() throws IOException { + byte[][] families = { fam1, fam2, fam3 }; + Configuration hc = initSplit(); + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, hc, families); + try { + LOG.info("" + HBaseTestCase.addContent(region, fam3)); + region.flush(true); + region.compactStores(); + byte[] splitRow = region.checkSplit(); + assertNotNull(splitRow); + LOG.info("SplitRow: " + Bytes.toString(splitRow)); + HRegion[] subregions = splitRegion(region, splitRow); + try { + // Need to open the regions. + for (int i = 0; i < subregions.length; i++) { + HRegion.openHRegion(subregions[i], null); + subregions[i].compactStores(); + } + Path oldRegionPath = region.getRegionFileSystem().getRegionDir(); + Path oldRegion1 = subregions[0].getRegionFileSystem().getRegionDir(); + Path oldRegion2 = subregions[1].getRegionFileSystem().getRegionDir(); + long startTime = System.currentTimeMillis(); + region = HRegion.mergeAdjacent(subregions[0], subregions[1]); + LOG.info("Merge regions elapsed time: " + + ((System.currentTimeMillis() - startTime) / 1000.0)); + FILESYSTEM.delete(oldRegion1, true); + FILESYSTEM.delete(oldRegion2, true); + FILESYSTEM.delete(oldRegionPath, true); + LOG.info("splitAndMerge completed."); + } finally { + for (int i = 0; i < subregions.length; i++) { + try { + HBaseTestingUtility.closeRegionAndWAL(subregions[i]); + } catch (IOException e) { + // Ignore. + } + } + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + /** + * @param parent + * Region to split. + * @param midkey + * Key to split around. + * @return The Regions we created. + * @throws IOException + */ + HRegion[] splitRegion(final HRegion parent, final byte[] midkey) throws IOException { + PairOfSameType result = null; + SplitTransactionImpl st = new SplitTransactionImpl(parent, midkey); + // If prepare does not return true, for some reason -- logged inside in + // the prepare call -- we are not ready to split just now. Just return. + if (!st.prepare()) { + parent.clearSplit(); + return null; + } + try { + result = st.execute(null, null); + } catch (IOException ioe) { + try { + LOG.info("Running rollback of failed split of " + + parent.getRegionInfo().getRegionNameAsString() + "; " + ioe.getMessage()); + st.rollback(null, null); + LOG.info("Successful rollback of failed split of " + + parent.getRegionInfo().getRegionNameAsString()); + return null; + } catch (RuntimeException e) { + // If failed rollback, kill this server to avoid having a hole in table. + LOG.info("Failed rollback of failed split of " + + parent.getRegionInfo().getRegionNameAsString() + " -- aborting server", e); + } + } + finally { + parent.clearSplit(); + } + return new HRegion[] { (HRegion)result.getFirst(), (HRegion)result.getSecond() }; + } + + // //////////////////////////////////////////////////////////////////////////// + // Scanner tests + // //////////////////////////////////////////////////////////////////////////// + @Test + public void testGetScanner_WithOkFamilies() throws IOException { + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] fam2 = Bytes.toBytes("fam2"); + + byte[][] families = { fam1, fam2 }; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + Scan scan = new Scan(); + scan.addFamily(fam1); + scan.addFamily(fam2); + try { + region.getScanner(scan); + } catch (Exception e) { + assertTrue("Families could not be found in Region", false); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testGetScanner_WithNotOkFamilies() throws IOException { + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] fam2 = Bytes.toBytes("fam2"); + + byte[][] families = { fam1 }; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + Scan scan = new Scan(); + scan.addFamily(fam2); + boolean ok = false; + try { + region.getScanner(scan); + } catch (Exception e) { + ok = true; + } + assertTrue("Families could not be found in Region", ok); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testGetScanner_WithNoFamilies() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] fam2 = Bytes.toBytes("fam2"); + byte[] fam3 = Bytes.toBytes("fam3"); + byte[] fam4 = Bytes.toBytes("fam4"); + + byte[][] families = { fam1, fam2, fam3, fam4 }; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + + // Putting data in Region + Put put = new Put(row1); + put.add(fam1, null, null); + put.add(fam2, null, null); + put.add(fam3, null, null); + put.add(fam4, null, null); + region.put(put); + + Scan scan = null; + HRegion.RegionScannerImpl is = null; + + // Testing to see how many scanners that is produced by getScanner, + // starting + // with known number, 2 - current = 1 + scan = new Scan(); + scan.addFamily(fam2); + scan.addFamily(fam4); + is = (HRegion.RegionScannerImpl) region.getScanner(scan); + assertEquals(1, ((HRegion.RegionScannerImpl) is).storeHeap.getHeap().size()); + + scan = new Scan(); + is = (HRegion.RegionScannerImpl) region.getScanner(scan); + assertEquals(families.length - 1, ((HRegion.RegionScannerImpl) is).storeHeap.getHeap().size()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + /** + * This method tests https://issues.apache.org/jira/browse/HBASE-2516. + * + * @throws IOException + */ + @Test + public void testGetScanner_WithRegionClosed() throws IOException { + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] fam2 = Bytes.toBytes("fam2"); + + byte[][] families = { fam1, fam2 }; + + // Setting up region + String method = this.getName(); + try { + this.region = initHRegion(tableName, method, CONF, families); + } catch (IOException e) { + e.printStackTrace(); + fail("Got IOException during initHRegion, " + e.getMessage()); + } + try { + region.closed.set(true); + try { + region.getScanner(null); + fail("Expected to get an exception during getScanner on a region that is closed"); + } catch (NotServingRegionException e) { + // this is the correct exception that is expected + } catch (IOException e) { + fail("Got wrong type of exception - should be a NotServingRegionException, but was an IOException: " + + e.getMessage()); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testRegionScanner_Next() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] row2 = Bytes.toBytes("row2"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] fam2 = Bytes.toBytes("fam2"); + byte[] fam3 = Bytes.toBytes("fam3"); + byte[] fam4 = Bytes.toBytes("fam4"); + + byte[][] families = { fam1, fam2, fam3, fam4 }; + long ts = System.currentTimeMillis(); + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + // Putting data in Region + Put put = null; + put = new Put(row1); + put.add(fam1, (byte[]) null, ts, null); + put.add(fam2, (byte[]) null, ts, null); + put.add(fam3, (byte[]) null, ts, null); + put.add(fam4, (byte[]) null, ts, null); + region.put(put); + + put = new Put(row2); + put.add(fam1, (byte[]) null, ts, null); + put.add(fam2, (byte[]) null, ts, null); + put.add(fam3, (byte[]) null, ts, null); + put.add(fam4, (byte[]) null, ts, null); + region.put(put); + + Scan scan = new Scan(); + scan.addFamily(fam2); + scan.addFamily(fam4); + InternalScanner is = region.getScanner(scan); + + List res = null; + + // Result 1 + List expected1 = new ArrayList(); + expected1.add(new KeyValue(row1, fam2, null, ts, KeyValue.Type.Put, null)); + expected1.add(new KeyValue(row1, fam4, null, ts, KeyValue.Type.Put, null)); + + res = new ArrayList(); + is.next(res); + for (int i = 0; i < res.size(); i++) { + assertTrue(CellUtil.equalsIgnoreMvccVersion(expected1.get(i), res.get(i))); + } + + // Result 2 + List expected2 = new ArrayList(); + expected2.add(new KeyValue(row2, fam2, null, ts, KeyValue.Type.Put, null)); + expected2.add(new KeyValue(row2, fam4, null, ts, KeyValue.Type.Put, null)); + + res = new ArrayList(); + is.next(res); + for (int i = 0; i < res.size(); i++) { + assertTrue(CellUtil.equalsIgnoreMvccVersion(expected2.get(i), res.get(i))); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testScanner_ExplicitColumns_FromMemStore_EnforceVersions() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] qf1 = Bytes.toBytes("qualifier1"); + byte[] qf2 = Bytes.toBytes("qualifier2"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[][] families = { fam1 }; + + long ts1 = System.currentTimeMillis(); + long ts2 = ts1 + 1; + long ts3 = ts1 + 2; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + // Putting data in Region + Put put = null; + KeyValue kv13 = new KeyValue(row1, fam1, qf1, ts3, KeyValue.Type.Put, null); + KeyValue kv12 = new KeyValue(row1, fam1, qf1, ts2, KeyValue.Type.Put, null); + KeyValue kv11 = new KeyValue(row1, fam1, qf1, ts1, KeyValue.Type.Put, null); + + KeyValue kv23 = new KeyValue(row1, fam1, qf2, ts3, KeyValue.Type.Put, null); + KeyValue kv22 = new KeyValue(row1, fam1, qf2, ts2, KeyValue.Type.Put, null); + KeyValue kv21 = new KeyValue(row1, fam1, qf2, ts1, KeyValue.Type.Put, null); + + put = new Put(row1); + put.add(kv13); + put.add(kv12); + put.add(kv11); + put.add(kv23); + put.add(kv22); + put.add(kv21); + region.put(put); + + // Expected + List expected = new ArrayList(); + expected.add(kv13); + expected.add(kv12); + + Scan scan = new Scan(row1); + scan.addColumn(fam1, qf1); + scan.setMaxVersions(MAX_VERSIONS); + List actual = new ArrayList(); + InternalScanner scanner = region.getScanner(scan); + + boolean hasNext = scanner.next(actual); + assertEquals(false, hasNext); + + // Verify result + for (int i = 0; i < expected.size(); i++) { + assertEquals(expected.get(i), actual.get(i)); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testScanner_ExplicitColumns_FromFilesOnly_EnforceVersions() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] qf1 = Bytes.toBytes("qualifier1"); + byte[] qf2 = Bytes.toBytes("qualifier2"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[][] families = { fam1 }; + + long ts1 = 1; // System.currentTimeMillis(); + long ts2 = ts1 + 1; + long ts3 = ts1 + 2; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + // Putting data in Region + Put put = null; + KeyValue kv13 = new KeyValue(row1, fam1, qf1, ts3, KeyValue.Type.Put, null); + KeyValue kv12 = new KeyValue(row1, fam1, qf1, ts2, KeyValue.Type.Put, null); + KeyValue kv11 = new KeyValue(row1, fam1, qf1, ts1, KeyValue.Type.Put, null); + + KeyValue kv23 = new KeyValue(row1, fam1, qf2, ts3, KeyValue.Type.Put, null); + KeyValue kv22 = new KeyValue(row1, fam1, qf2, ts2, KeyValue.Type.Put, null); + KeyValue kv21 = new KeyValue(row1, fam1, qf2, ts1, KeyValue.Type.Put, null); + + put = new Put(row1); + put.add(kv13); + put.add(kv12); + put.add(kv11); + put.add(kv23); + put.add(kv22); + put.add(kv21); + region.put(put); + region.flush(true); + + // Expected + List expected = new ArrayList(); + expected.add(kv13); + expected.add(kv12); + expected.add(kv23); + expected.add(kv22); + + Scan scan = new Scan(row1); + scan.addColumn(fam1, qf1); + scan.addColumn(fam1, qf2); + scan.setMaxVersions(MAX_VERSIONS); + List actual = new ArrayList(); + InternalScanner scanner = region.getScanner(scan); + + boolean hasNext = scanner.next(actual); + assertEquals(false, hasNext); + + // Verify result + for (int i = 0; i < expected.size(); i++) { + assertTrue(CellUtil.equalsIgnoreMvccVersion(expected.get(i), actual.get(i))); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testScanner_ExplicitColumns_FromMemStoreAndFiles_EnforceVersions() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[][] families = { fam1 }; + byte[] qf1 = Bytes.toBytes("qualifier1"); + byte[] qf2 = Bytes.toBytes("qualifier2"); + + long ts1 = 1; + long ts2 = ts1 + 1; + long ts3 = ts1 + 2; + long ts4 = ts1 + 3; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + // Putting data in Region + KeyValue kv14 = new KeyValue(row1, fam1, qf1, ts4, KeyValue.Type.Put, null); + KeyValue kv13 = new KeyValue(row1, fam1, qf1, ts3, KeyValue.Type.Put, null); + KeyValue kv12 = new KeyValue(row1, fam1, qf1, ts2, KeyValue.Type.Put, null); + KeyValue kv11 = new KeyValue(row1, fam1, qf1, ts1, KeyValue.Type.Put, null); + + KeyValue kv24 = new KeyValue(row1, fam1, qf2, ts4, KeyValue.Type.Put, null); + KeyValue kv23 = new KeyValue(row1, fam1, qf2, ts3, KeyValue.Type.Put, null); + KeyValue kv22 = new KeyValue(row1, fam1, qf2, ts2, KeyValue.Type.Put, null); + KeyValue kv21 = new KeyValue(row1, fam1, qf2, ts1, KeyValue.Type.Put, null); + + Put put = null; + put = new Put(row1); + put.add(kv14); + put.add(kv24); + region.put(put); + region.flush(true); + + put = new Put(row1); + put.add(kv23); + put.add(kv13); + region.put(put); + region.flush(true); + + put = new Put(row1); + put.add(kv22); + put.add(kv12); + region.put(put); + region.flush(true); + + put = new Put(row1); + put.add(kv21); + put.add(kv11); + region.put(put); + + // Expected + List expected = new ArrayList(); + expected.add(kv14); + expected.add(kv13); + expected.add(kv12); + expected.add(kv24); + expected.add(kv23); + expected.add(kv22); + + Scan scan = new Scan(row1); + scan.addColumn(fam1, qf1); + scan.addColumn(fam1, qf2); + int versions = 3; + scan.setMaxVersions(versions); + List actual = new ArrayList(); + InternalScanner scanner = region.getScanner(scan); + + boolean hasNext = scanner.next(actual); + assertEquals(false, hasNext); + + // Verify result + for (int i = 0; i < expected.size(); i++) { + assertTrue(CellUtil.equalsIgnoreMvccVersion(expected.get(i), actual.get(i))); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testScanner_Wildcard_FromMemStore_EnforceVersions() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] qf1 = Bytes.toBytes("qualifier1"); + byte[] qf2 = Bytes.toBytes("qualifier2"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[][] families = { fam1 }; + + long ts1 = System.currentTimeMillis(); + long ts2 = ts1 + 1; + long ts3 = ts1 + 2; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, families); + try { + // Putting data in Region + Put put = null; + KeyValue kv13 = new KeyValue(row1, fam1, qf1, ts3, KeyValue.Type.Put, null); + KeyValue kv12 = new KeyValue(row1, fam1, qf1, ts2, KeyValue.Type.Put, null); + KeyValue kv11 = new KeyValue(row1, fam1, qf1, ts1, KeyValue.Type.Put, null); + + KeyValue kv23 = new KeyValue(row1, fam1, qf2, ts3, KeyValue.Type.Put, null); + KeyValue kv22 = new KeyValue(row1, fam1, qf2, ts2, KeyValue.Type.Put, null); + KeyValue kv21 = new KeyValue(row1, fam1, qf2, ts1, KeyValue.Type.Put, null); + + put = new Put(row1); + put.add(kv13); + put.add(kv12); + put.add(kv11); + put.add(kv23); + put.add(kv22); + put.add(kv21); + region.put(put); + + // Expected + List expected = new ArrayList(); + expected.add(kv13); + expected.add(kv12); + expected.add(kv23); + expected.add(kv22); + + Scan scan = new Scan(row1); + scan.addFamily(fam1); + scan.setMaxVersions(MAX_VERSIONS); + List actual = new ArrayList(); + InternalScanner scanner = region.getScanner(scan); + + boolean hasNext = scanner.next(actual); + assertEquals(false, hasNext); + + // Verify result + for (int i = 0; i < expected.size(); i++) { + assertEquals(expected.get(i), actual.get(i)); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testScanner_Wildcard_FromFilesOnly_EnforceVersions() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] qf1 = Bytes.toBytes("qualifier1"); + byte[] qf2 = Bytes.toBytes("qualifier2"); + byte[] fam1 = Bytes.toBytes("fam1"); + + long ts1 = 1; // System.currentTimeMillis(); + long ts2 = ts1 + 1; + long ts3 = ts1 + 2; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + // Putting data in Region + Put put = null; + KeyValue kv13 = new KeyValue(row1, fam1, qf1, ts3, KeyValue.Type.Put, null); + KeyValue kv12 = new KeyValue(row1, fam1, qf1, ts2, KeyValue.Type.Put, null); + KeyValue kv11 = new KeyValue(row1, fam1, qf1, ts1, KeyValue.Type.Put, null); + + KeyValue kv23 = new KeyValue(row1, fam1, qf2, ts3, KeyValue.Type.Put, null); + KeyValue kv22 = new KeyValue(row1, fam1, qf2, ts2, KeyValue.Type.Put, null); + KeyValue kv21 = new KeyValue(row1, fam1, qf2, ts1, KeyValue.Type.Put, null); + + put = new Put(row1); + put.add(kv13); + put.add(kv12); + put.add(kv11); + put.add(kv23); + put.add(kv22); + put.add(kv21); + region.put(put); + region.flush(true); + + // Expected + List expected = new ArrayList(); + expected.add(kv13); + expected.add(kv12); + expected.add(kv23); + expected.add(kv22); + + Scan scan = new Scan(row1); + scan.addFamily(fam1); + scan.setMaxVersions(MAX_VERSIONS); + List actual = new ArrayList(); + InternalScanner scanner = region.getScanner(scan); + + boolean hasNext = scanner.next(actual); + assertEquals(false, hasNext); + + // Verify result + for (int i = 0; i < expected.size(); i++) { + assertTrue(CellUtil.equalsIgnoreMvccVersion(expected.get(i), actual.get(i))); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testScanner_StopRow1542() throws IOException { + byte[] family = Bytes.toBytes("testFamily"); + this.region = initHRegion(tableName, getName(), CONF, family); + try { + byte[] row1 = Bytes.toBytes("row111"); + byte[] row2 = Bytes.toBytes("row222"); + byte[] row3 = Bytes.toBytes("row333"); + byte[] row4 = Bytes.toBytes("row444"); + byte[] row5 = Bytes.toBytes("row555"); + + byte[] col1 = Bytes.toBytes("Pub111"); + byte[] col2 = Bytes.toBytes("Pub222"); + + Put put = new Put(row1); + put.add(family, col1, Bytes.toBytes(10L)); + region.put(put); + + put = new Put(row2); + put.add(family, col1, Bytes.toBytes(15L)); + region.put(put); + + put = new Put(row3); + put.add(family, col2, Bytes.toBytes(20L)); + region.put(put); + + put = new Put(row4); + put.add(family, col2, Bytes.toBytes(30L)); + region.put(put); + + put = new Put(row5); + put.add(family, col1, Bytes.toBytes(40L)); + region.put(put); + + Scan scan = new Scan(row3, row4); + scan.setMaxVersions(); + scan.addColumn(family, col1); + InternalScanner s = region.getScanner(scan); + + List results = new ArrayList(); + assertEquals(false, s.next(results)); + assertEquals(0, results.size()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testScanner_Wildcard_FromMemStoreAndFiles_EnforceVersions() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] qf1 = Bytes.toBytes("qualifier1"); + byte[] qf2 = Bytes.toBytes("quateslifier2"); + + long ts1 = 1; + long ts2 = ts1 + 1; + long ts3 = ts1 + 2; + long ts4 = ts1 + 3; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, CONF, fam1); + try { + // Putting data in Region + KeyValue kv14 = new KeyValue(row1, fam1, qf1, ts4, KeyValue.Type.Put, null); + KeyValue kv13 = new KeyValue(row1, fam1, qf1, ts3, KeyValue.Type.Put, null); + KeyValue kv12 = new KeyValue(row1, fam1, qf1, ts2, KeyValue.Type.Put, null); + KeyValue kv11 = new KeyValue(row1, fam1, qf1, ts1, KeyValue.Type.Put, null); + + KeyValue kv24 = new KeyValue(row1, fam1, qf2, ts4, KeyValue.Type.Put, null); + KeyValue kv23 = new KeyValue(row1, fam1, qf2, ts3, KeyValue.Type.Put, null); + KeyValue kv22 = new KeyValue(row1, fam1, qf2, ts2, KeyValue.Type.Put, null); + KeyValue kv21 = new KeyValue(row1, fam1, qf2, ts1, KeyValue.Type.Put, null); + + Put put = null; + put = new Put(row1); + put.add(kv14); + put.add(kv24); + region.put(put); + region.flush(true); + + put = new Put(row1); + put.add(kv23); + put.add(kv13); + region.put(put); + region.flush(true); + + put = new Put(row1); + put.add(kv22); + put.add(kv12); + region.put(put); + region.flush(true); + + put = new Put(row1); + put.add(kv21); + put.add(kv11); + region.put(put); + + // Expected + List expected = new ArrayList(); + expected.add(kv14); + expected.add(kv13); + expected.add(kv12); + expected.add(kv24); + expected.add(kv23); + expected.add(kv22); + + Scan scan = new Scan(row1); + int versions = 3; + scan.setMaxVersions(versions); + List actual = new ArrayList(); + InternalScanner scanner = region.getScanner(scan); + + boolean hasNext = scanner.next(actual); + assertEquals(false, hasNext); + + // Verify result + for (int i = 0; i < expected.size(); i++) { + assertTrue(CellUtil.equalsIgnoreMvccVersion(expected.get(i), actual.get(i))); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + /** + * Added for HBASE-5416 + * + * Here we test scan optimization when only subset of CFs are used in filter + * conditions. + */ + @Test + public void testScanner_JoinedScanners() throws IOException { + byte[] cf_essential = Bytes.toBytes("essential"); + byte[] cf_joined = Bytes.toBytes("joined"); + byte[] cf_alpha = Bytes.toBytes("alpha"); + this.region = initHRegion(tableName, getName(), CONF, cf_essential, cf_joined, cf_alpha); + try { + byte[] row1 = Bytes.toBytes("row1"); + byte[] row2 = Bytes.toBytes("row2"); + byte[] row3 = Bytes.toBytes("row3"); + + byte[] col_normal = Bytes.toBytes("d"); + byte[] col_alpha = Bytes.toBytes("a"); + + byte[] filtered_val = Bytes.toBytes(3); + + Put put = new Put(row1); + put.add(cf_essential, col_normal, Bytes.toBytes(1)); + put.add(cf_joined, col_alpha, Bytes.toBytes(1)); + region.put(put); + + put = new Put(row2); + put.add(cf_essential, col_alpha, Bytes.toBytes(2)); + put.add(cf_joined, col_normal, Bytes.toBytes(2)); + put.add(cf_alpha, col_alpha, Bytes.toBytes(2)); + region.put(put); + + put = new Put(row3); + put.add(cf_essential, col_normal, filtered_val); + put.add(cf_joined, col_normal, filtered_val); + region.put(put); + + // Check two things: + // 1. result list contains expected values + // 2. result list is sorted properly + + Scan scan = new Scan(); + Filter filter = new SingleColumnValueExcludeFilter(cf_essential, col_normal, + CompareFilter.CompareOp.NOT_EQUAL, filtered_val); + scan.setFilter(filter); + scan.setLoadColumnFamiliesOnDemand(true); + InternalScanner s = region.getScanner(scan); + + List results = new ArrayList(); + assertTrue(s.next(results)); + assertEquals(results.size(), 1); + results.clear(); + + assertTrue(s.next(results)); + assertEquals(results.size(), 3); + assertTrue("orderCheck", CellUtil.matchingFamily(results.get(0), cf_alpha)); + assertTrue("orderCheck", CellUtil.matchingFamily(results.get(1), cf_essential)); + assertTrue("orderCheck", CellUtil.matchingFamily(results.get(2), cf_joined)); + results.clear(); + + assertFalse(s.next(results)); + assertEquals(results.size(), 0); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + /** + * HBASE-5416 + * + * Test case when scan limits amount of KVs returned on each next() call. + */ + @Test + public void testScanner_JoinedScannersWithLimits() throws IOException { + final byte[] cf_first = Bytes.toBytes("first"); + final byte[] cf_second = Bytes.toBytes("second"); + + this.region = initHRegion(tableName, getName(), CONF, cf_first, cf_second); + try { + final byte[] col_a = Bytes.toBytes("a"); + final byte[] col_b = Bytes.toBytes("b"); + + Put put; + + for (int i = 0; i < 10; i++) { + put = new Put(Bytes.toBytes("r" + Integer.toString(i))); + put.add(cf_first, col_a, Bytes.toBytes(i)); + if (i < 5) { + put.add(cf_first, col_b, Bytes.toBytes(i)); + put.add(cf_second, col_a, Bytes.toBytes(i)); + put.add(cf_second, col_b, Bytes.toBytes(i)); + } + region.put(put); + } + + Scan scan = new Scan(); + scan.setLoadColumnFamiliesOnDemand(true); + Filter bogusFilter = new FilterBase() { + @Override + public ReturnCode filterKeyValue(Cell ignored) throws IOException { + return ReturnCode.INCLUDE; + } + @Override + public boolean isFamilyEssential(byte[] name) { + return Bytes.equals(name, cf_first); + } + }; + + scan.setFilter(bogusFilter); + InternalScanner s = region.getScanner(scan); + + // Our data looks like this: + // r0: first:a, first:b, second:a, second:b + // r1: first:a, first:b, second:a, second:b + // r2: first:a, first:b, second:a, second:b + // r3: first:a, first:b, second:a, second:b + // r4: first:a, first:b, second:a, second:b + // r5: first:a + // r6: first:a + // r7: first:a + // r8: first:a + // r9: first:a + + // But due to next's limit set to 3, we should get this: + // r0: first:a, first:b, second:a + // r0: second:b + // r1: first:a, first:b, second:a + // r1: second:b + // r2: first:a, first:b, second:a + // r2: second:b + // r3: first:a, first:b, second:a + // r3: second:b + // r4: first:a, first:b, second:a + // r4: second:b + // r5: first:a + // r6: first:a + // r7: first:a + // r8: first:a + // r9: first:a + + List results = new ArrayList(); + int index = 0; + ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(3).build(); + while (true) { + boolean more = s.next(results, scannerContext); + if ((index >> 1) < 5) { + if (index % 2 == 0) + assertEquals(results.size(), 3); + else + assertEquals(results.size(), 1); + } else + assertEquals(results.size(), 1); + results.clear(); + index++; + if (!more) + break; + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + /** + * Write an HFile block full with Cells whose qualifier that are identical between + * 0 and Short.MAX_VALUE. See HBASE-13329. + * @throws Exception + */ + @Test + public void testLongQualifier() throws Exception { + String method = name.getMethodName(); + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + this.region = initHRegion(tableName, method, CONF, family); + byte[] q = new byte[Short.MAX_VALUE+2]; + Arrays.fill(q, 0, q.length - 1, (byte) 42); + for (byte i=0; i<10; i++) { + Put p = new Put(Bytes.toBytes("row")); + // qualifiers that differ past Short.MAX_VALUE + q[q.length-1]=i; + p.addColumn(family, q, q); + region.put(p); + } + region.flush(false); + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + // //////////////////////////////////////////////////////////////////////////// + // Split test + // //////////////////////////////////////////////////////////////////////////// + /** + * Splits twice and verifies getting from each of the split regions. + * + * @throws Exception + */ + @Test + public void testBasicSplit() throws Exception { + byte[][] families = { fam1, fam2, fam3 }; + + Configuration hc = initSplit(); + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, hc, families); + + try { + LOG.info("" + HBaseTestCase.addContent(region, fam3)); + region.flush(true); + region.compactStores(); + byte[] splitRow = region.checkSplit(); + assertNotNull(splitRow); + LOG.info("SplitRow: " + Bytes.toString(splitRow)); + HRegion[] regions = splitRegion(region, splitRow); + try { + // Need to open the regions. + // TODO: Add an 'open' to HRegion... don't do open by constructing + // instance. + for (int i = 0; i < regions.length; i++) { + regions[i] = HRegion.openHRegion(regions[i], null); + } + // Assert can get rows out of new regions. Should be able to get first + // row from first region and the midkey from second region. + assertGet(regions[0], fam3, Bytes.toBytes(START_KEY)); + assertGet(regions[1], fam3, splitRow); + // Test I can get scanner and that it starts at right place. + assertScan(regions[0], fam3, Bytes.toBytes(START_KEY)); + assertScan(regions[1], fam3, splitRow); + // Now prove can't split regions that have references. + for (int i = 0; i < regions.length; i++) { + // Add so much data to this region, we create a store file that is > + // than one of our unsplitable references. it will. + for (int j = 0; j < 2; j++) { + HBaseTestCase.addContent(regions[i], fam3); + } + HBaseTestCase.addContent(regions[i], fam2); + HBaseTestCase.addContent(regions[i], fam1); + regions[i].flush(true); + } + + byte[][] midkeys = new byte[regions.length][]; + // To make regions splitable force compaction. + for (int i = 0; i < regions.length; i++) { + regions[i].compactStores(); + midkeys[i] = regions[i].checkSplit(); + } + + TreeMap sortedMap = new TreeMap(); + // Split these two daughter regions so then I'll have 4 regions. Will + // split because added data above. + for (int i = 0; i < regions.length; i++) { + HRegion[] rs = null; + if (midkeys[i] != null) { + rs = splitRegion(regions[i], midkeys[i]); + for (int j = 0; j < rs.length; j++) { + sortedMap.put(Bytes.toString(rs[j].getRegionInfo().getRegionName()), + HRegion.openHRegion(rs[j], null)); + } + } + } + LOG.info("Made 4 regions"); + // The splits should have been even. Test I can get some arbitrary row + // out of each. + int interval = (LAST_CHAR - FIRST_CHAR) / 3; + byte[] b = Bytes.toBytes(START_KEY); + for (HRegion r : sortedMap.values()) { + assertGet(r, fam3, b); + b[0] += interval; + } + } finally { + for (int i = 0; i < regions.length; i++) { + try { + regions[i].close(); + } catch (IOException e) { + // Ignore. + } + } + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testSplitRegion() throws IOException { + byte[] qualifier = Bytes.toBytes("qualifier"); + Configuration hc = initSplit(); + int numRows = 10; + byte[][] families = { fam1, fam3 }; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, hc, families); + + // Put data in region + int startRow = 100; + putData(startRow, numRows, qualifier, families); + int splitRow = startRow + numRows; + putData(splitRow, numRows, qualifier, families); + region.flush(true); + + HRegion[] regions = null; + try { + regions = splitRegion(region, Bytes.toBytes("" + splitRow)); + // Opening the regions returned. + for (int i = 0; i < regions.length; i++) { + regions[i] = HRegion.openHRegion(regions[i], null); + } + // Verifying that the region has been split + assertEquals(2, regions.length); + + // Verifying that all data is still there and that data is in the right + // place + verifyData(regions[0], startRow, numRows, qualifier, families); + verifyData(regions[1], splitRow, numRows, qualifier, families); + + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testClearForceSplit() throws IOException { + byte[] qualifier = Bytes.toBytes("qualifier"); + Configuration hc = initSplit(); + int numRows = 10; + byte[][] families = { fam1, fam3 }; + + // Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, hc, families); + + // Put data in region + int startRow = 100; + putData(startRow, numRows, qualifier, families); + int splitRow = startRow + numRows; + byte[] splitRowBytes = Bytes.toBytes("" + splitRow); + putData(splitRow, numRows, qualifier, families); + region.flush(true); + + HRegion[] regions = null; + try { + // Set force split + region.forceSplit(splitRowBytes); + assertTrue(region.shouldForceSplit()); + // Split point should be the force split row + assertTrue(Bytes.equals(splitRowBytes, region.checkSplit())); + + // Add a store that has references. + HStore storeMock = Mockito.mock(HStore.class); + when(storeMock.hasReferences()).thenReturn(true); + when(storeMock.getFamily()).thenReturn(new HColumnDescriptor("cf")); + when(storeMock.close()).thenReturn(ImmutableList.of()); + when(storeMock.getColumnFamilyName()).thenReturn("cf"); + region.stores.put(Bytes.toBytes(storeMock.getColumnFamilyName()), storeMock); + assertTrue(region.hasReferences()); + + // Will not split since the store has references. + regions = splitRegion(region, splitRowBytes); + assertNull(regions); + + // Region force split should be cleared after the split try. + assertFalse(region.shouldForceSplit()); + + // Remove the store that has references. + region.stores.remove(Bytes.toBytes(storeMock.getColumnFamilyName())); + assertFalse(region.hasReferences()); + + // Now we can split. + regions = splitRegion(region, splitRowBytes); + + // Opening the regions returned. + for (int i = 0; i < regions.length; i++) { + regions[i] = HRegion.openHRegion(regions[i], null); + } + // Verifying that the region has been split + assertEquals(2, regions.length); + + // Verifying that all data is still there and that data is in the right + // place + verifyData(regions[0], startRow, numRows, qualifier, families); + verifyData(regions[1], splitRow, numRows, qualifier, families); + + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + /** + * Flushes the cache in a thread while scanning. The tests verify that the + * scan is coherent - e.g. the returned results are always of the same or + * later update as the previous results. + * + * @throws IOException + * scan / compact + * @throws InterruptedException + * thread join + */ + @Test + public void testFlushCacheWhileScanning() throws IOException, InterruptedException { + byte[] family = Bytes.toBytes("family"); + int numRows = 1000; + int flushAndScanInterval = 10; + int compactInterval = 10 * flushAndScanInterval; + + String method = "testFlushCacheWhileScanning"; + this.region = initHRegion(tableName, method, CONF, family); + try { + FlushThread flushThread = new FlushThread(); + flushThread.start(); + + Scan scan = new Scan(); + scan.addFamily(family); + scan.setFilter(new SingleColumnValueFilter(family, qual1, CompareFilter.CompareOp.EQUAL, + new BinaryComparator(Bytes.toBytes(5L)))); + + int expectedCount = 0; + List res = new ArrayList(); + + boolean toggle = true; + for (long i = 0; i < numRows; i++) { + Put put = new Put(Bytes.toBytes(i)); + put.setDurability(Durability.SKIP_WAL); + put.add(family, qual1, Bytes.toBytes(i % 10)); + region.put(put); + + if (i != 0 && i % compactInterval == 0) { + // System.out.println("iteration = " + i); + region.compact(true); + } + + if (i % 10 == 5L) { + expectedCount++; + } + + if (i != 0 && i % flushAndScanInterval == 0) { + res.clear(); + InternalScanner scanner = region.getScanner(scan); + if (toggle) { + flushThread.flush(); + } + while (scanner.next(res)) + ; + if (!toggle) { + flushThread.flush(); + } + assertEquals("i=" + i, expectedCount, res.size()); + toggle = !toggle; + } + } + + flushThread.done(); + flushThread.join(); + flushThread.checkNoError(); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + protected class FlushThread extends Thread { + private volatile boolean done; + private Throwable error = null; + + public void done() { + done = true; + synchronized (this) { + interrupt(); + } + } + + public void checkNoError() { + if (error != null) { + assertNull(error); + } + } + + @Override + public void run() { + done = false; + while (!done) { + synchronized (this) { + try { + wait(); + } catch (InterruptedException ignored) { + if (done) { + break; + } + } + } + try { + region.flush(true,true); + } catch (IOException e) { + if (!done) { + LOG.error("Error while flusing cache", e); + error = e; + } + break; + } + } + + } + + public void flush() { + synchronized (this) { + notify(); + } + + } + } + + /** + * Writes very wide records and scans for the latest every time.. Flushes and + * compacts the region every now and then to keep things realistic. + * + * @throws IOException + * by flush / scan / compaction + * @throws InterruptedException + * when joining threads + */ + @Test + public void testWritesWhileScanning() throws IOException, InterruptedException { + int testCount = 100; + int numRows = 1; + int numFamilies = 10; + int numQualifiers = 100; + int flushInterval = 7; + int compactInterval = 5 * flushInterval; + byte[][] families = new byte[numFamilies][]; + for (int i = 0; i < numFamilies; i++) { + families[i] = Bytes.toBytes("family" + i); + } + byte[][] qualifiers = new byte[numQualifiers][]; + for (int i = 0; i < numQualifiers; i++) { + qualifiers[i] = Bytes.toBytes("qual" + i); + } + + String method = "testWritesWhileScanning"; + this.region = initHRegion(tableName, method, CONF, families); + try { + PutThread putThread = new PutThread(numRows, families, qualifiers); + putThread.start(); + putThread.waitForFirstPut(); + + FlushThread flushThread = new FlushThread(); + flushThread.start(); + + Scan scan = new Scan(Bytes.toBytes("row0"), Bytes.toBytes("row1")); + + int expectedCount = numFamilies * numQualifiers; + List res = new ArrayList(); + + long prevTimestamp = 0L; + for (int i = 0; i < testCount; i++) { + + if (i != 0 && i % compactInterval == 0) { + region.compact(true); + } + + if (i != 0 && i % flushInterval == 0) { + flushThread.flush(); + } + + boolean previousEmpty = res.isEmpty(); + res.clear(); + InternalScanner scanner = region.getScanner(scan); + while (scanner.next(res)) + ; + if (!res.isEmpty() || !previousEmpty || i > compactInterval) { + assertEquals("i=" + i, expectedCount, res.size()); + long timestamp = res.get(0).getTimestamp(); + assertTrue("Timestamps were broke: " + timestamp + " prev: " + prevTimestamp, + timestamp >= prevTimestamp); + prevTimestamp = timestamp; + } + } + + putThread.done(); + + region.flush(true,true); + + putThread.join(); + putThread.checkNoError(); + + flushThread.done(); + flushThread.join(); + flushThread.checkNoError(); + } finally { + try { + HBaseTestingUtility.closeRegionAndWAL(this.region); + } catch (DroppedSnapshotException dse) { + // We could get this on way out because we interrupt the background flusher and it could + // fail anywhere causing a DSE over in the background flusher... only it is not properly + // dealt with so could still be memory hanging out when we get to here -- memory we can't + // flush because the accounting is 'off' since original DSE. + } + this.region = null; + } + } + + protected class PutThread extends Thread { + private volatile boolean done; + private volatile int numPutsFinished = 0; + + private Throwable error = null; + private int numRows; + private byte[][] families; + private byte[][] qualifiers; + + private PutThread(int numRows, byte[][] families, byte[][] qualifiers) { + this.numRows = numRows; + this.families = families; + this.qualifiers = qualifiers; + } + + /** + * Block calling thread until this instance of PutThread has put at least one row. + */ + public void waitForFirstPut() throws InterruptedException { + // wait until put thread actually puts some data + while (isAlive() && numPutsFinished == 0) { + checkNoError(); + Thread.sleep(50); + } + } + + public void done() { + done = true; + synchronized (this) { + interrupt(); + } + } + + public void checkNoError() { + if (error != null) { + assertNull(error); + } + } + + @Override + public void run() { + done = false; + while (!done) { + try { + for (int r = 0; r < numRows; r++) { + byte[] row = Bytes.toBytes("row" + r); + Put put = new Put(row); + put.setDurability(Durability.SKIP_WAL); + byte[] value = Bytes.toBytes(String.valueOf(numPutsFinished)); + for (byte[] family : families) { + for (byte[] qualifier : qualifiers) { + put.add(family, qualifier, (long) numPutsFinished, value); + } + } + region.put(put); + numPutsFinished++; + if (numPutsFinished > 0 && numPutsFinished % 47 == 0) { + System.out.println("put iteration = " + numPutsFinished); + Delete delete = new Delete(row, (long) numPutsFinished - 30); + region.delete(delete); + } + numPutsFinished++; + } + } catch (InterruptedIOException e) { + // This is fine. It means we are done, or didn't get the lock on time + } catch (IOException e) { + LOG.error("error while putting records", e); + error = e; + break; + } + } + + } + + } + + /** + * Writes very wide records and gets the latest row every time.. Flushes and + * compacts the region aggressivly to catch issues. + * + * @throws IOException + * by flush / scan / compaction + * @throws InterruptedException + * when joining threads + */ + @Test + public void testWritesWhileGetting() throws Exception { + int testCount = 50; + int numRows = 1; + int numFamilies = 10; + int numQualifiers = 100; + int compactInterval = 100; + byte[][] families = new byte[numFamilies][]; + for (int i = 0; i < numFamilies; i++) { + families[i] = Bytes.toBytes("family" + i); + } + byte[][] qualifiers = new byte[numQualifiers][]; + for (int i = 0; i < numQualifiers; i++) { + qualifiers[i] = Bytes.toBytes("qual" + i); + } + + + String method = "testWritesWhileGetting"; + // This test flushes constantly and can cause many files to be created, + // possibly + // extending over the ulimit. Make sure compactions are aggressive in + // reducing + // the number of HFiles created. + Configuration conf = HBaseConfiguration.create(CONF); + conf.setInt("hbase.hstore.compaction.min", 1); + conf.setInt("hbase.hstore.compaction.max", 1000); + this.region = initHRegion(tableName, method, conf, families); + PutThread putThread = null; + MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(conf); + try { + putThread = new PutThread(numRows, families, qualifiers); + putThread.start(); + putThread.waitForFirstPut(); + + // Add a thread that flushes as fast as possible + ctx.addThread(new MultithreadedTestUtil.RepeatingTestThread(ctx) { + private int flushesSinceCompact = 0; + private final int maxFlushesSinceCompact = 20; + + @Override + public void doAnAction() throws Exception { + if (region.flush(true).isCompactionNeeded()) { + ++flushesSinceCompact; + } + // Compact regularly to avoid creating too many files and exceeding + // the ulimit. + if (flushesSinceCompact == maxFlushesSinceCompact) { + region.compact(false); + flushesSinceCompact = 0; + } + } + }); + ctx.startThreads(); + + Get get = new Get(Bytes.toBytes("row0")); + Result result = null; + + int expectedCount = numFamilies * numQualifiers; + + long prevTimestamp = 0L; + for (int i = 0; i < testCount; i++) { + LOG.info("testWritesWhileGetting verify turn " + i); + boolean previousEmpty = result == null || result.isEmpty(); + result = region.get(get); + if (!result.isEmpty() || !previousEmpty || i > compactInterval) { + assertEquals("i=" + i, expectedCount, result.size()); + // TODO this was removed, now what dangit?! + // search looking for the qualifier in question? + long timestamp = 0; + for (Cell kv : result.rawCells()) { + if (CellUtil.matchingFamily(kv, families[0]) + && CellUtil.matchingQualifier(kv, qualifiers[0])) { + timestamp = kv.getTimestamp(); + } + } + assertTrue(timestamp >= prevTimestamp); + prevTimestamp = timestamp; + Cell previousKV = null; + + for (Cell kv : result.rawCells()) { + byte[] thisValue = CellUtil.cloneValue(kv); + if (previousKV != null) { + if (Bytes.compareTo(CellUtil.cloneValue(previousKV), thisValue) != 0) { + LOG.warn("These two KV should have the same value." + " Previous KV:" + previousKV + + "(memStoreTS:" + previousKV.getSequenceId() + ")" + ", New KV: " + kv + + "(memStoreTS:" + kv.getSequenceId() + ")"); + assertEquals(0, Bytes.compareTo(CellUtil.cloneValue(previousKV), thisValue)); + } + } + previousKV = kv; + } + } + } + } finally { + if (putThread != null) + putThread.done(); + + region.flush(true); + + if (putThread != null) { + putThread.join(); + putThread.checkNoError(); + } + + ctx.stop(); + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testHolesInMeta() throws Exception { + byte[] family = Bytes.toBytes("family"); + this.region = initHRegion(tableName, Bytes.toBytes("x"), Bytes.toBytes("z"), method, CONF, + false, family); + try { + byte[] rowNotServed = Bytes.toBytes("a"); + Get g = new Get(rowNotServed); + try { + region.get(g); + fail(); + } catch (WrongRegionException x) { + // OK + } + byte[] row = Bytes.toBytes("y"); + g = new Get(row); + region.get(g); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testIndexesScanWithOneDeletedRow() throws IOException { + byte[] family = Bytes.toBytes("family"); + + // Setting up region + String method = "testIndexesScanWithOneDeletedRow"; + this.region = initHRegion(tableName, method, CONF, family); + try { + Put put = new Put(Bytes.toBytes(1L)); + put.add(family, qual1, 1L, Bytes.toBytes(1L)); + region.put(put); + + region.flush(true); + + Delete delete = new Delete(Bytes.toBytes(1L), 1L); + region.delete(delete); + + put = new Put(Bytes.toBytes(2L)); + put.add(family, qual1, 2L, Bytes.toBytes(2L)); + region.put(put); + + Scan idxScan = new Scan(); + idxScan.addFamily(family); + idxScan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays. asList( + new SingleColumnValueFilter(family, qual1, CompareFilter.CompareOp.GREATER_OR_EQUAL, + new BinaryComparator(Bytes.toBytes(0L))), new SingleColumnValueFilter(family, qual1, + CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes(3L)))))); + InternalScanner scanner = region.getScanner(idxScan); + List res = new ArrayList(); + + while (scanner.next(res)) + ; + assertEquals(1L, res.size()); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + // //////////////////////////////////////////////////////////////////////////// + // Bloom filter test + // //////////////////////////////////////////////////////////////////////////// + @Test + public void testBloomFilterSize() throws IOException { + byte[] fam1 = Bytes.toBytes("fam1"); + byte[] qf1 = Bytes.toBytes("col"); + byte[] val1 = Bytes.toBytes("value1"); + // Create Table + HColumnDescriptor hcd = new HColumnDescriptor(fam1).setMaxVersions(Integer.MAX_VALUE) + .setBloomFilterType(BloomType.ROWCOL); + + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(hcd); + HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false); + this.region = TEST_UTIL.createLocalHRegion(info, htd); + try { + int num_unique_rows = 10; + int duplicate_multiplier = 2; + int num_storefiles = 4; + + int version = 0; + for (int f = 0; f < num_storefiles; f++) { + for (int i = 0; i < duplicate_multiplier; i++) { + for (int j = 0; j < num_unique_rows; j++) { + Put put = new Put(Bytes.toBytes("row" + j)); + put.setDurability(Durability.SKIP_WAL); + put.add(fam1, qf1, version++, val1); + region.put(put); + } + } + region.flush(true); + } + // before compaction + HStore store = (HStore) region.getStore(fam1); + Collection storeFiles = store.getStorefiles(); + for (StoreFile storefile : storeFiles) { + StoreFile.Reader reader = storefile.getReader(); + reader.loadFileInfo(); + reader.loadBloomfilter(); + assertEquals(num_unique_rows * duplicate_multiplier, reader.getEntries()); + assertEquals(num_unique_rows, reader.getFilterEntries()); + } + + region.compact(true); + + // after compaction + storeFiles = store.getStorefiles(); + for (StoreFile storefile : storeFiles) { + StoreFile.Reader reader = storefile.getReader(); + reader.loadFileInfo(); + reader.loadBloomfilter(); + assertEquals(num_unique_rows * duplicate_multiplier * num_storefiles, reader.getEntries()); + assertEquals(num_unique_rows, reader.getFilterEntries()); + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testAllColumnsWithBloomFilter() throws IOException { + byte[] TABLE = Bytes.toBytes("testAllColumnsWithBloomFilter"); + byte[] FAMILY = Bytes.toBytes("family"); + + // Create table + HColumnDescriptor hcd = new HColumnDescriptor(FAMILY).setMaxVersions(Integer.MAX_VALUE) + .setBloomFilterType(BloomType.ROWCOL); + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE)); + htd.addFamily(hcd); + HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false); + this.region = TEST_UTIL.createLocalHRegion(info, htd); + try { + // For row:0, col:0: insert versions 1 through 5. + byte row[] = Bytes.toBytes("row:" + 0); + byte column[] = Bytes.toBytes("column:" + 0); + Put put = new Put(row); + put.setDurability(Durability.SKIP_WAL); + for (long idx = 1; idx <= 4; idx++) { + put.add(FAMILY, column, idx, Bytes.toBytes("value-version-" + idx)); + } + region.put(put); + + // Flush + region.flush(true); + + // Get rows + Get get = new Get(row); + get.setMaxVersions(); + Cell[] kvs = region.get(get).rawCells(); + + // Check if rows are correct + assertEquals(4, kvs.length); + checkOneCell(kvs[0], FAMILY, 0, 0, 4); + checkOneCell(kvs[1], FAMILY, 0, 0, 3); + checkOneCell(kvs[2], FAMILY, 0, 0, 2); + checkOneCell(kvs[3], FAMILY, 0, 0, 1); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + /** + * Testcase to cover bug-fix for HBASE-2823 Ensures correct delete when + * issuing delete row on columns with bloom filter set to row+col + * (BloomType.ROWCOL) + */ + @Test + public void testDeleteRowWithBloomFilter() throws IOException { + byte[] familyName = Bytes.toBytes("familyName"); + + // Create Table + HColumnDescriptor hcd = new HColumnDescriptor(familyName).setMaxVersions(Integer.MAX_VALUE) + .setBloomFilterType(BloomType.ROWCOL); + + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(hcd); + HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false); + this.region = TEST_UTIL.createLocalHRegion(info, htd); + try { + // Insert some data + byte row[] = Bytes.toBytes("row1"); + byte col[] = Bytes.toBytes("col1"); + + Put put = new Put(row); + put.add(familyName, col, 1, Bytes.toBytes("SomeRandomValue")); + region.put(put); + region.flush(true); + + Delete del = new Delete(row); + region.delete(del); + region.flush(true); + + // Get remaining rows (should have none) + Get get = new Get(row); + get.addColumn(familyName, col); + + Cell[] keyValues = region.get(get).rawCells(); + assertTrue(keyValues.length == 0); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test + public void testgetHDFSBlocksDistribution() throws Exception { + HBaseTestingUtility htu = new HBaseTestingUtility(); + // Why do we set the block size in this test? If we set it smaller than the kvs, then we'll + // break up the file in to more pieces that can be distributed across the three nodes and we + // won't be able to have the condition this test asserts; that at least one node has + // a copy of all replicas -- if small block size, then blocks are spread evenly across the + // the three nodes. hfilev3 with tags seems to put us over the block size. St.Ack. + // final int DEFAULT_BLOCK_SIZE = 1024; + // htu.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE); + htu.getConfiguration().setInt("dfs.replication", 2); + + // set up a cluster with 3 nodes + MiniHBaseCluster cluster = null; + String dataNodeHosts[] = new String[] { "host1", "host2", "host3" }; + int regionServersCount = 3; + + try { + cluster = htu.startMiniCluster(1, regionServersCount, dataNodeHosts); + byte[][] families = { fam1, fam2 }; + Table ht = htu.createTable(TableName.valueOf(this.getName()), families); + + // Setting up region + byte row[] = Bytes.toBytes("row1"); + byte col[] = Bytes.toBytes("col1"); + + Put put = new Put(row); + put.add(fam1, col, 1, Bytes.toBytes("test1")); + put.add(fam2, col, 1, Bytes.toBytes("test2")); + ht.put(put); + + HRegion firstRegion = htu.getHBaseCluster().getRegions(TableName.valueOf(this.getName())) + .get(0); + firstRegion.flush(true); + HDFSBlocksDistribution blocksDistribution1 = firstRegion.getHDFSBlocksDistribution(); + + // Given the default replication factor is 2 and we have 2 HFiles, + // we will have total of 4 replica of blocks on 3 datanodes; thus there + // must be at least one host that have replica for 2 HFiles. That host's + // weight will be equal to the unique block weight. + long uniqueBlocksWeight1 = blocksDistribution1.getUniqueBlocksTotalWeight(); + StringBuilder sb = new StringBuilder(); + for (String host: blocksDistribution1.getTopHosts()) { + if (sb.length() > 0) sb.append(", "); + sb.append(host); + sb.append("="); + sb.append(blocksDistribution1.getWeight(host)); + } + + String topHost = blocksDistribution1.getTopHosts().get(0); + long topHostWeight = blocksDistribution1.getWeight(topHost); + String msg = "uniqueBlocksWeight=" + uniqueBlocksWeight1 + ", topHostWeight=" + + topHostWeight + ", topHost=" + topHost + "; " + sb.toString(); + LOG.info(msg); + assertTrue(msg, uniqueBlocksWeight1 == topHostWeight); + + // use the static method to compute the value, it should be the same. + // static method is used by load balancer or other components + HDFSBlocksDistribution blocksDistribution2 = HRegion.computeHDFSBlocksDistribution( + htu.getConfiguration(), firstRegion.getTableDesc(), firstRegion.getRegionInfo()); + long uniqueBlocksWeight2 = blocksDistribution2.getUniqueBlocksTotalWeight(); + + assertTrue(uniqueBlocksWeight1 == uniqueBlocksWeight2); + + ht.close(); + } finally { + if (cluster != null) { + htu.shutdownMiniCluster(); + } + } + } + + /** + * Testcase to check state of region initialization task set to ABORTED or not + * if any exceptions during initialization + * + * @throws Exception + */ + @Test + public void testStatusSettingToAbortIfAnyExceptionDuringRegionInitilization() throws Exception { + TableName tableName = TableName.valueOf(name.getMethodName()); + HRegionInfo info = null; + try { + FileSystem fs = Mockito.mock(FileSystem.class); + Mockito.when(fs.exists((Path) Mockito.anyObject())).thenThrow(new IOException()); + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(new HColumnDescriptor("cf")); + info = new HRegionInfo(htd.getTableName(), HConstants.EMPTY_BYTE_ARRAY, + HConstants.EMPTY_BYTE_ARRAY, false); + Path path = new Path(dir + "testStatusSettingToAbortIfAnyExceptionDuringRegionInitilization"); + region = HRegion.newHRegion(path, null, fs, CONF, info, htd, null); + // region initialization throws IOException and set task state to ABORTED. + region.initialize(); + fail("Region initialization should fail due to IOException"); + } catch (IOException io) { + List tasks = TaskMonitor.get().getTasks(); + for (MonitoredTask monitoredTask : tasks) { + if (!(monitoredTask instanceof MonitoredRPCHandler) + && monitoredTask.getDescription().contains(region.toString())) { + assertTrue("Region state should be ABORTED.", + monitoredTask.getState().equals(MonitoredTask.State.ABORTED)); + break; + } + } + } finally { + HBaseTestingUtility.closeRegionAndWAL(region); + } + } + + /** + * Verifies that the .regioninfo file is written on region creation and that + * is recreated if missing during region opening. + */ + @Test + public void testRegionInfoFileCreation() throws IOException { + Path rootDir = new Path(dir + "testRegionInfoFileCreation"); + + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("testtb")); + htd.addFamily(new HColumnDescriptor("cf")); + + HRegionInfo hri = new HRegionInfo(htd.getTableName()); + + // Create a region and skip the initialization (like CreateTableHandler) + HRegion region = HBaseTestingUtility.createRegionAndWAL(hri, rootDir, CONF, htd, false); + Path regionDir = region.getRegionFileSystem().getRegionDir(); + FileSystem fs = region.getRegionFileSystem().getFileSystem(); + HBaseTestingUtility.closeRegionAndWAL(region); + + Path regionInfoFile = new Path(regionDir, HRegionFileSystem.REGION_INFO_FILE); + + // Verify that the .regioninfo file is present + assertTrue(HRegionFileSystem.REGION_INFO_FILE + " should be present in the region dir", + fs.exists(regionInfoFile)); + + // Try to open the region + region = HRegion.openHRegion(rootDir, hri, htd, null, CONF); + assertEquals(regionDir, region.getRegionFileSystem().getRegionDir()); + HBaseTestingUtility.closeRegionAndWAL(region); + + // Verify that the .regioninfo file is still there + assertTrue(HRegionFileSystem.REGION_INFO_FILE + " should be present in the region dir", + fs.exists(regionInfoFile)); + + // Remove the .regioninfo file and verify is recreated on region open + fs.delete(regionInfoFile, true); + assertFalse(HRegionFileSystem.REGION_INFO_FILE + " should be removed from the region dir", + fs.exists(regionInfoFile)); + + region = HRegion.openHRegion(rootDir, hri, htd, null, CONF); + // region = TEST_UTIL.openHRegion(hri, htd); + assertEquals(regionDir, region.getRegionFileSystem().getRegionDir()); + HBaseTestingUtility.closeRegionAndWAL(region); + + // Verify that the .regioninfo file is still there + assertTrue(HRegionFileSystem.REGION_INFO_FILE + " should be present in the region dir", + fs.exists(new Path(regionDir, HRegionFileSystem.REGION_INFO_FILE))); + } + + /** + * TestCase for increment + */ + private static class Incrementer implements Runnable { + private HRegion region; + private final static byte[] incRow = Bytes.toBytes("incRow"); + private final static byte[] family = Bytes.toBytes("family"); + private final static byte[] qualifier = Bytes.toBytes("qualifier"); + private final static long ONE = 1l; + private int incCounter; + + public Incrementer(HRegion region, int incCounter) { + this.region = region; + this.incCounter = incCounter; + } + + @Override + public void run() { + int count = 0; + while (count < incCounter) { + Increment inc = new Increment(incRow); + inc.addColumn(family, qualifier, ONE); + count++; + try { + region.increment(inc); + } catch (IOException e) { + LOG.info("Count=" + count + ", " + e); + break; + } + } + } + } + + /** + * Test case to check increment function with memstore flushing + * @throws Exception + */ + @Test + public void testParallelIncrementWithMemStoreFlush() throws Exception { + byte[] family = Incrementer.family; + this.region = initHRegion(tableName, method, CONF, family); + final HRegion region = this.region; + final AtomicBoolean incrementDone = new AtomicBoolean(false); + Runnable flusher = new Runnable() { + @Override + public void run() { + while (!incrementDone.get()) { + try { + region.flush(true); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + }; + + // after all increment finished, the row will increment to 20*100 = 2000 + int threadNum = 20; + int incCounter = 100; + long expected = threadNum * incCounter; + Thread[] incrementers = new Thread[threadNum]; + Thread flushThread = new Thread(flusher); + for (int i = 0; i < threadNum; i++) { + incrementers[i] = new Thread(new Incrementer(this.region, incCounter)); + incrementers[i].start(); + } + flushThread.start(); + for (int i = 0; i < threadNum; i++) { + incrementers[i].join(); + } + + incrementDone.set(true); + flushThread.join(); + + Get get = new Get(Incrementer.incRow); + get.addColumn(Incrementer.family, Incrementer.qualifier); + get.setMaxVersions(1); + Result res = this.region.get(get); + List kvs = res.getColumnCells(Incrementer.family, Incrementer.qualifier); + + // we just got the latest version + assertEquals(kvs.size(), 1); + Cell kv = kvs.get(0); + assertEquals(expected, Bytes.toLong(kv.getValueArray(), kv.getValueOffset())); + this.region = null; + } + + /** + * TestCase for append + */ + private static class Appender implements Runnable { + private HRegion region; + private final static byte[] appendRow = Bytes.toBytes("appendRow"); + private final static byte[] family = Bytes.toBytes("family"); + private final static byte[] qualifier = Bytes.toBytes("qualifier"); + private final static byte[] CHAR = Bytes.toBytes("a"); + private int appendCounter; + + public Appender(HRegion region, int appendCounter) { + this.region = region; + this.appendCounter = appendCounter; + } + + @Override + public void run() { + int count = 0; + while (count < appendCounter) { + Append app = new Append(appendRow); + app.add(family, qualifier, CHAR); + count++; + try { + region.append(app); + } catch (IOException e) { + LOG.info("Count=" + count + ", max=" + appendCounter + ", " + e); + break; + } + } + } + } + + /** + * Test case to check append function with memstore flushing + * @throws Exception + */ + @Test + public void testParallelAppendWithMemStoreFlush() throws Exception { + byte[] family = Appender.family; + this.region = initHRegion(tableName, method, CONF, family); + final HRegion region = this.region; + final AtomicBoolean appendDone = new AtomicBoolean(false); + Runnable flusher = new Runnable() { + @Override + public void run() { + while (!appendDone.get()) { + try { + region.flush(true); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + }; + + // After all append finished, the value will append to threadNum * + // appendCounter Appender.CHAR + int threadNum = 20; + int appendCounter = 100; + byte[] expected = new byte[threadNum * appendCounter]; + for (int i = 0; i < threadNum * appendCounter; i++) { + System.arraycopy(Appender.CHAR, 0, expected, i, 1); + } + Thread[] appenders = new Thread[threadNum]; + Thread flushThread = new Thread(flusher); + for (int i = 0; i < threadNum; i++) { + appenders[i] = new Thread(new Appender(this.region, appendCounter)); + appenders[i].start(); + } + flushThread.start(); + for (int i = 0; i < threadNum; i++) { + appenders[i].join(); + } + + appendDone.set(true); + flushThread.join(); + + Get get = new Get(Appender.appendRow); + get.addColumn(Appender.family, Appender.qualifier); + get.setMaxVersions(1); + Result res = this.region.get(get); + List kvs = res.getColumnCells(Appender.family, Appender.qualifier); + + // we just got the latest version + assertEquals(kvs.size(), 1); + Cell kv = kvs.get(0); + byte[] appendResult = new byte[kv.getValueLength()]; + System.arraycopy(kv.getValueArray(), kv.getValueOffset(), appendResult, 0, kv.getValueLength()); + assertArrayEquals(expected, appendResult); + this.region = null; + } + + /** + * Test case to check put function with memstore flushing for same row, same ts + * @throws Exception + */ + @Test + public void testPutWithMemStoreFlush() throws Exception { + byte[] family = Bytes.toBytes("family"); + ; + byte[] qualifier = Bytes.toBytes("qualifier"); + byte[] row = Bytes.toBytes("putRow"); + byte[] value = null; + this.region = initHRegion(tableName, method, CONF, family); + Put put = null; + Get get = null; + List kvs = null; + Result res = null; + + put = new Put(row); + value = Bytes.toBytes("value0"); + put.add(family, qualifier, 1234567l, value); + region.put(put); + get = new Get(row); + get.addColumn(family, qualifier); + get.setMaxVersions(); + res = this.region.get(get); + kvs = res.getColumnCells(family, qualifier); + assertEquals(1, kvs.size()); + assertArrayEquals(Bytes.toBytes("value0"), CellUtil.cloneValue(kvs.get(0))); + + region.flush(true); + get = new Get(row); + get.addColumn(family, qualifier); + get.setMaxVersions(); + res = this.region.get(get); + kvs = res.getColumnCells(family, qualifier); + assertEquals(1, kvs.size()); + assertArrayEquals(Bytes.toBytes("value0"), CellUtil.cloneValue(kvs.get(0))); + + put = new Put(row); + value = Bytes.toBytes("value1"); + put.add(family, qualifier, 1234567l, value); + region.put(put); + get = new Get(row); + get.addColumn(family, qualifier); + get.setMaxVersions(); + res = this.region.get(get); + kvs = res.getColumnCells(family, qualifier); + assertEquals(1, kvs.size()); + assertArrayEquals(Bytes.toBytes("value1"), CellUtil.cloneValue(kvs.get(0))); + + region.flush(true); + get = new Get(row); + get.addColumn(family, qualifier); + get.setMaxVersions(); + res = this.region.get(get); + kvs = res.getColumnCells(family, qualifier); + assertEquals(1, kvs.size()); + assertArrayEquals(Bytes.toBytes("value1"), CellUtil.cloneValue(kvs.get(0))); + } + + @Test + public void testDurability() throws Exception { + String method = "testDurability"; + // there are 5 x 5 cases: + // table durability(SYNC,FSYNC,ASYC,SKIP,USE_DEFAULT) x mutation + // durability(SYNC,FSYNC,ASYC,SKIP,USE_DEFAULT) + + // expected cases for append and sync wal + durabilityTest(method, Durability.SYNC_WAL, Durability.SYNC_WAL, 0, true, true, false); + durabilityTest(method, Durability.SYNC_WAL, Durability.FSYNC_WAL, 0, true, true, false); + durabilityTest(method, Durability.SYNC_WAL, Durability.USE_DEFAULT, 0, true, true, false); + + durabilityTest(method, Durability.FSYNC_WAL, Durability.SYNC_WAL, 0, true, true, false); + durabilityTest(method, Durability.FSYNC_WAL, Durability.FSYNC_WAL, 0, true, true, false); + durabilityTest(method, Durability.FSYNC_WAL, Durability.USE_DEFAULT, 0, true, true, false); + + durabilityTest(method, Durability.ASYNC_WAL, Durability.SYNC_WAL, 0, true, true, false); + durabilityTest(method, Durability.ASYNC_WAL, Durability.FSYNC_WAL, 0, true, true, false); + + durabilityTest(method, Durability.SKIP_WAL, Durability.SYNC_WAL, 0, true, true, false); + durabilityTest(method, Durability.SKIP_WAL, Durability.FSYNC_WAL, 0, true, true, false); + + durabilityTest(method, Durability.USE_DEFAULT, Durability.SYNC_WAL, 0, true, true, false); + durabilityTest(method, Durability.USE_DEFAULT, Durability.FSYNC_WAL, 0, true, true, false); + durabilityTest(method, Durability.USE_DEFAULT, Durability.USE_DEFAULT, 0, true, true, false); + + // expected cases for async wal + durabilityTest(method, Durability.SYNC_WAL, Durability.ASYNC_WAL, 0, true, false, false); + durabilityTest(method, Durability.FSYNC_WAL, Durability.ASYNC_WAL, 0, true, false, false); + durabilityTest(method, Durability.ASYNC_WAL, Durability.ASYNC_WAL, 0, true, false, false); + durabilityTest(method, Durability.SKIP_WAL, Durability.ASYNC_WAL, 0, true, false, false); + durabilityTest(method, Durability.USE_DEFAULT, Durability.ASYNC_WAL, 0, true, false, false); + durabilityTest(method, Durability.ASYNC_WAL, Durability.USE_DEFAULT, 0, true, false, false); + + durabilityTest(method, Durability.SYNC_WAL, Durability.ASYNC_WAL, 5000, true, false, true); + durabilityTest(method, Durability.FSYNC_WAL, Durability.ASYNC_WAL, 5000, true, false, true); + durabilityTest(method, Durability.ASYNC_WAL, Durability.ASYNC_WAL, 5000, true, false, true); + durabilityTest(method, Durability.SKIP_WAL, Durability.ASYNC_WAL, 5000, true, false, true); + durabilityTest(method, Durability.USE_DEFAULT, Durability.ASYNC_WAL, 5000, true, false, true); + durabilityTest(method, Durability.ASYNC_WAL, Durability.USE_DEFAULT, 5000, true, false, true); + + // expect skip wal cases + durabilityTest(method, Durability.SYNC_WAL, Durability.SKIP_WAL, 0, true, false, false); + durabilityTest(method, Durability.FSYNC_WAL, Durability.SKIP_WAL, 0, true, false, false); + durabilityTest(method, Durability.ASYNC_WAL, Durability.SKIP_WAL, 0, true, false, false); + durabilityTest(method, Durability.SKIP_WAL, Durability.SKIP_WAL, 0, true, false, false); + durabilityTest(method, Durability.USE_DEFAULT, Durability.SKIP_WAL, 0, true, false, false); + durabilityTest(method, Durability.SKIP_WAL, Durability.USE_DEFAULT, 0, true, false, false); + + } + + @SuppressWarnings("unchecked") + private void durabilityTest(String method, Durability tableDurability, + Durability mutationDurability, long timeout, boolean expectAppend, final boolean expectSync, + final boolean expectSyncFromLogSyncer) throws Exception { + Configuration conf = HBaseConfiguration.create(CONF); + method = method + "_" + tableDurability.name() + "_" + mutationDurability.name(); + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + Path logDir = new Path(new Path(dir + method), "log"); + final Configuration walConf = new Configuration(conf); + FSUtils.setRootDir(walConf, logDir); + final WALFactory wals = new WALFactory(walConf, null, UUID.randomUUID().toString()); + final WAL wal = spy(wals.getWAL(tableName.getName(), tableName.getNamespace())); + this.region = initHRegion(tableName, HConstants.EMPTY_START_ROW, + HConstants.EMPTY_END_ROW, method, conf, false, tableDurability, wal, + new byte[][] { family }); + + Put put = new Put(Bytes.toBytes("r1")); + put.add(family, Bytes.toBytes("q1"), Bytes.toBytes("v1")); + put.setDurability(mutationDurability); + region.put(put); + + //verify append called or not + verify(wal, expectAppend ? times(1) : never()) + .append((HTableDescriptor) any(), (HRegionInfo) any(), (WALKey) any(), + (WALEdit) any(), Mockito.anyBoolean()); + + // verify sync called or not + if (expectSync || expectSyncFromLogSyncer) { + TEST_UTIL.waitFor(timeout, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + try { + if (expectSync) { + verify(wal, times(1)).sync(anyLong()); // Hregion calls this one + } else if (expectSyncFromLogSyncer) { + verify(wal, times(1)).sync(); // wal syncer calls this one + } + } catch (Throwable ignore) { + } + return true; + } + }); + } else { + //verify(wal, never()).sync(anyLong()); + verify(wal, never()).sync(); + } + + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + + @Test + public void testRegionReplicaSecondary() throws IOException { + // create a primary region, load some data and flush + // create a secondary region, and do a get against that + Path rootDir = new Path(dir + "testRegionReplicaSecondary"); + FSUtils.setRootDir(TEST_UTIL.getConfiguration(), rootDir); + + byte[][] families = new byte[][] { + Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3") + }; + byte[] cq = Bytes.toBytes("cq"); + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("testRegionReplicaSecondary")); + for (byte[] family : families) { + htd.addFamily(new HColumnDescriptor(family)); + } + + long time = System.currentTimeMillis(); + HRegionInfo primaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 0); + HRegionInfo secondaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 1); + + HRegion primaryRegion = null, secondaryRegion = null; + + try { + primaryRegion = HBaseTestingUtility.createRegionAndWAL(primaryHri, + rootDir, TEST_UTIL.getConfiguration(), htd); + + // load some data + putData(primaryRegion, 0, 1000, cq, families); + + // flush region + primaryRegion.flush(true); + + // open secondary region + secondaryRegion = HRegion.openHRegion(rootDir, secondaryHri, htd, null, CONF); + + verifyData(secondaryRegion, 0, 1000, cq, families); + } finally { + if (primaryRegion != null) { + HBaseTestingUtility.closeRegionAndWAL(primaryRegion); + } + if (secondaryRegion != null) { + HBaseTestingUtility.closeRegionAndWAL(secondaryRegion); + } + } + } + + @Test + public void testRegionReplicaSecondaryIsReadOnly() throws IOException { + // create a primary region, load some data and flush + // create a secondary region, and do a put against that + Path rootDir = new Path(dir + "testRegionReplicaSecondary"); + FSUtils.setRootDir(TEST_UTIL.getConfiguration(), rootDir); + + byte[][] families = new byte[][] { + Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3") + }; + byte[] cq = Bytes.toBytes("cq"); + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("testRegionReplicaSecondary")); + for (byte[] family : families) { + htd.addFamily(new HColumnDescriptor(family)); + } + + long time = System.currentTimeMillis(); + HRegionInfo primaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 0); + HRegionInfo secondaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 1); + + HRegion primaryRegion = null, secondaryRegion = null; + + try { + primaryRegion = HBaseTestingUtility.createRegionAndWAL(primaryHri, + rootDir, TEST_UTIL.getConfiguration(), htd); + + // load some data + putData(primaryRegion, 0, 1000, cq, families); + + // flush region + primaryRegion.flush(true); + + // open secondary region + secondaryRegion = HRegion.openHRegion(rootDir, secondaryHri, htd, null, CONF); + + try { + putData(secondaryRegion, 0, 1000, cq, families); + fail("Should have thrown exception"); + } catch (IOException ex) { + // expected + } + } finally { + if (primaryRegion != null) { + HBaseTestingUtility.closeRegionAndWAL(primaryRegion); + } + if (secondaryRegion != null) { + HBaseTestingUtility.closeRegionAndWAL(secondaryRegion); + } + } + } + + static WALFactory createWALFactory(Configuration conf, Path rootDir) throws IOException { + Configuration confForWAL = new Configuration(conf); + confForWAL.set(HConstants.HBASE_DIR, rootDir.toString()); + return new WALFactory(confForWAL, + Collections.singletonList(new MetricsWAL()), + "hregion-" + RandomStringUtils.randomNumeric(8)); + } + + @Test + public void testCompactionFromPrimary() throws IOException { + Path rootDir = new Path(dir + "testRegionReplicaSecondary"); + FSUtils.setRootDir(TEST_UTIL.getConfiguration(), rootDir); + + byte[][] families = new byte[][] { + Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3") + }; + byte[] cq = Bytes.toBytes("cq"); + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("testRegionReplicaSecondary")); + for (byte[] family : families) { + htd.addFamily(new HColumnDescriptor(family)); + } + + long time = System.currentTimeMillis(); + HRegionInfo primaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 0); + HRegionInfo secondaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 1); + + HRegion primaryRegion = null, secondaryRegion = null; + + try { + primaryRegion = HBaseTestingUtility.createRegionAndWAL(primaryHri, + rootDir, TEST_UTIL.getConfiguration(), htd); + + // load some data + putData(primaryRegion, 0, 1000, cq, families); + + // flush region + primaryRegion.flush(true); + + // open secondary region + secondaryRegion = HRegion.openHRegion(rootDir, secondaryHri, htd, null, CONF); + + // move the file of the primary region to the archive, simulating a compaction + Collection storeFiles = primaryRegion.getStore(families[0]).getStorefiles(); + primaryRegion.getRegionFileSystem().removeStoreFiles(Bytes.toString(families[0]), storeFiles); + Collection storeFileInfos = primaryRegion.getRegionFileSystem().getStoreFiles(families[0]); + Assert.assertTrue(storeFileInfos == null || storeFileInfos.size() == 0); + + verifyData(secondaryRegion, 0, 1000, cq, families); + } finally { + if (primaryRegion != null) { + HBaseTestingUtility.closeRegionAndWAL(primaryRegion); + } + if (secondaryRegion != null) { + HBaseTestingUtility.closeRegionAndWAL(secondaryRegion); + } + } + } + + private void putData(int startRow, int numRows, byte[] qf, byte[]... families) throws IOException { + putData(this.region, startRow, numRows, qf, families); + } + + private void putData(HRegion region, + int startRow, int numRows, byte[] qf, byte[]... families) throws IOException { + putData(region, Durability.SKIP_WAL, startRow, numRows, qf, families); + } + + static void putData(HRegion region, Durability durability, + int startRow, int numRows, byte[] qf, byte[]... families) throws IOException { + for (int i = startRow; i < startRow + numRows; i++) { + Put put = new Put(Bytes.toBytes("" + i)); + put.setDurability(durability); + for (byte[] family : families) { + put.add(family, qf, null); + } + region.put(put); + } + } + + static void verifyData(HRegion newReg, int startRow, int numRows, byte[] qf, byte[]... families) + throws IOException { + for (int i = startRow; i < startRow + numRows; i++) { + byte[] row = Bytes.toBytes("" + i); + Get get = new Get(row); + for (byte[] family : families) { + get.addColumn(family, qf); + } + Result result = newReg.get(get); + Cell[] raw = result.rawCells(); + assertEquals(families.length, result.size()); + for (int j = 0; j < families.length; j++) { + assertTrue(CellUtil.matchingRow(raw[j], row)); + assertTrue(CellUtil.matchingFamily(raw[j], families[j])); + assertTrue(CellUtil.matchingQualifier(raw[j], qf)); + } + } + } + + static void assertGet(final HRegion r, final byte[] family, final byte[] k) throws IOException { + // Now I have k, get values out and assert they are as expected. + Get get = new Get(k).addFamily(family).setMaxVersions(); + Cell[] results = r.get(get).rawCells(); + for (int j = 0; j < results.length; j++) { + byte[] tmp = CellUtil.cloneValue(results[j]); + // Row should be equal to value every time. + assertTrue(Bytes.equals(k, tmp)); + } + } + + /* + * Assert first value in the passed region is firstValue. + * + * @param r + * + * @param fs + * + * @param firstValue + * + * @throws IOException + */ + private void assertScan(final HRegion r, final byte[] fs, final byte[] firstValue) + throws IOException { + byte[][] families = { fs }; + Scan scan = new Scan(); + for (int i = 0; i < families.length; i++) + scan.addFamily(families[i]); + InternalScanner s = r.getScanner(scan); + try { + List curVals = new ArrayList(); + boolean first = true; + OUTER_LOOP: while (s.next(curVals)) { + for (Cell kv : curVals) { + byte[] val = CellUtil.cloneValue(kv); + byte[] curval = val; + if (first) { + first = false; + assertTrue(Bytes.compareTo(curval, firstValue) == 0); + } else { + // Not asserting anything. Might as well break. + break OUTER_LOOP; + } + } + } + } finally { + s.close(); + } + } + + /** + * Test that we get the expected flush results back + * @throws IOException + */ + @Test + public void testFlushResult() throws IOException { + String method = name.getMethodName(); + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + + this.region = initHRegion(tableName, method, family); + + // empty memstore, flush doesn't run + HRegion.FlushResult fr = region.flush(true,true); + assertFalse(fr.isFlushSucceeded()); + assertFalse(fr.isCompactionNeeded()); + + // Flush enough files to get up to the threshold, doesn't need compactions + for (int i = 0; i < 2; i++) { + Put put = new Put(tableName.toBytes()).add(family, family, tableName.toBytes()); + region.put(put); + fr = region.flush(true,true); + assertTrue(fr.isFlushSucceeded()); + assertFalse(fr.isCompactionNeeded()); + } + + // Two flushes after the threshold, compactions are needed + for (int i = 0; i < 2; i++) { + Put put = new Put(tableName.toBytes()).add(family, family, tableName.toBytes()); + region.put(put); + fr = region.flush(true,true); + assertTrue(fr.isFlushSucceeded()); + assertTrue(fr.isCompactionNeeded()); + } + } + + private Configuration initSplit() { + // Always compact if there is more than one store file. + CONF.setInt("hbase.hstore.compactionThreshold", 2); + + // Make lease timeout longer, lease checks less frequent + CONF.setInt("hbase.master.lease.thread.wakefrequency", 5 * 1000); + + CONF.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 10 * 1000); + + // Increase the amount of time between client retries + CONF.setLong("hbase.client.pause", 15 * 1000); + + // This size should make it so we always split using the addContent + // below. After adding all data, the first region is 1.3M + CONF.setLong(HConstants.HREGION_MAX_FILESIZE, 1024 * 128); + return CONF; + } + + /** + * @param tableName + * @param callingMethod + * @param conf + * @param families + * @throws IOException + * @return A region on which you must call + * {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done. + */ + private static HRegion initHRegion(TableName tableName, String callingMethod, Configuration conf, + byte[]... families) throws IOException { + return initHRegion(tableName, null, null, callingMethod, conf, false, families); + } + + /** + * @param tableName + * @param callingMethod + * @param conf + * @param isReadOnly + * @param families + * @throws IOException + * @return A region on which you must call + * {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done. + */ + private static HRegion initHRegion(TableName tableName, String callingMethod, Configuration conf, + boolean isReadOnly, byte[]... families) throws IOException { + return initHRegion(tableName, null, null, callingMethod, conf, isReadOnly, families); + } + + public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, + String callingMethod, Configuration conf, boolean isReadOnly, byte[]... families) + throws IOException { + Path logDir = TEST_UTIL.getDataTestDirOnTestFS(callingMethod + ".log"); + HRegionInfo hri = new HRegionInfo(tableName, startKey, stopKey); + final WAL wal = HBaseTestingUtility.createWal(conf, logDir, hri); + return initHRegion(tableName, startKey, stopKey, callingMethod, conf, isReadOnly, + Durability.SYNC_WAL, wal, families); + } + + /** + * @param tableName + * @param startKey + * @param stopKey + * @param callingMethod + * @param conf + * @param isReadOnly + * @param families + * @throws IOException + * @return A region on which you must call + * {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done. + */ + public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, + String callingMethod, Configuration conf, boolean isReadOnly, Durability durability, + WAL wal, byte[]... families) throws IOException { + boolean[] inMemory = new boolean[families.length]; + for(int i = 0; i < inMemory.length; i++) { + inMemory[i] = true; + } + return TEST_UTIL.createLocalHRegionWithInMemoryFlags(tableName, startKey, stopKey, + isReadOnly, durability, wal, inMemory, families); + } + + /** + * Assert that the passed in Cell has expected contents for the specified row, + * column & timestamp. + */ + private void checkOneCell(Cell kv, byte[] cf, int rowIdx, int colIdx, long ts) { + String ctx = "rowIdx=" + rowIdx + "; colIdx=" + colIdx + "; ts=" + ts; + assertEquals("Row mismatch which checking: " + ctx, "row:" + rowIdx, + Bytes.toString(CellUtil.cloneRow(kv))); + assertEquals("ColumnFamily mismatch while checking: " + ctx, Bytes.toString(cf), + Bytes.toString(CellUtil.cloneFamily(kv))); + assertEquals("Column qualifier mismatch while checking: " + ctx, "column:" + colIdx, + Bytes.toString(CellUtil.cloneQualifier(kv))); + assertEquals("Timestamp mismatch while checking: " + ctx, ts, kv.getTimestamp()); + assertEquals("Value mismatch while checking: " + ctx, "value-version-" + ts, + Bytes.toString(CellUtil.cloneValue(kv))); + } + + @Test (timeout=60000) + public void testReverseScanner_FromMemStore_SingleCF_Normal() + throws IOException { + byte[] rowC = Bytes.toBytes("rowC"); + byte[] rowA = Bytes.toBytes("rowA"); + byte[] rowB = Bytes.toBytes("rowB"); + byte[] cf = Bytes.toBytes("CF"); + byte[][] families = { cf }; + byte[] col = Bytes.toBytes("C"); + long ts = 1; + String method = this.getName(); + this.region = initHRegion(tableName, method, families); + try { + KeyValue kv1 = new KeyValue(rowC, cf, col, ts, KeyValue.Type.Put, null); + KeyValue kv11 = new KeyValue(rowC, cf, col, ts + 1, KeyValue.Type.Put, + null); + KeyValue kv2 = new KeyValue(rowA, cf, col, ts, KeyValue.Type.Put, null); + KeyValue kv3 = new KeyValue(rowB, cf, col, ts, KeyValue.Type.Put, null); + Put put = null; + put = new Put(rowC); + put.add(kv1); + put.add(kv11); + region.put(put); + put = new Put(rowA); + put.add(kv2); + region.put(put); + put = new Put(rowB); + put.add(kv3); + region.put(put); + + Scan scan = new Scan(rowC); + scan.setMaxVersions(5); + scan.setReversed(true); + InternalScanner scanner = region.getScanner(scan); + List currRow = new ArrayList(); + boolean hasNext = scanner.next(currRow); + assertEquals(2, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowC, 0, rowC.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowB, 0, rowB.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowA, 0, rowA.length)); + assertFalse(hasNext); + scanner.close(); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test (timeout=60000) + public void testReverseScanner_FromMemStore_SingleCF_LargerKey() + throws IOException { + byte[] rowC = Bytes.toBytes("rowC"); + byte[] rowA = Bytes.toBytes("rowA"); + byte[] rowB = Bytes.toBytes("rowB"); + byte[] rowD = Bytes.toBytes("rowD"); + byte[] cf = Bytes.toBytes("CF"); + byte[][] families = { cf }; + byte[] col = Bytes.toBytes("C"); + long ts = 1; + String method = this.getName(); + this.region = initHRegion(tableName, method, families); + try { + KeyValue kv1 = new KeyValue(rowC, cf, col, ts, KeyValue.Type.Put, null); + KeyValue kv11 = new KeyValue(rowC, cf, col, ts + 1, KeyValue.Type.Put, + null); + KeyValue kv2 = new KeyValue(rowA, cf, col, ts, KeyValue.Type.Put, null); + KeyValue kv3 = new KeyValue(rowB, cf, col, ts, KeyValue.Type.Put, null); + Put put = null; + put = new Put(rowC); + put.add(kv1); + put.add(kv11); + region.put(put); + put = new Put(rowA); + put.add(kv2); + region.put(put); + put = new Put(rowB); + put.add(kv3); + region.put(put); + + Scan scan = new Scan(rowD); + List currRow = new ArrayList(); + scan.setReversed(true); + scan.setMaxVersions(5); + InternalScanner scanner = region.getScanner(scan); + boolean hasNext = scanner.next(currRow); + assertEquals(2, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowC, 0, rowC.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowB, 0, rowB.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowA, 0, rowA.length)); + assertFalse(hasNext); + scanner.close(); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test (timeout=60000) + public void testReverseScanner_FromMemStore_SingleCF_FullScan() + throws IOException { + byte[] rowC = Bytes.toBytes("rowC"); + byte[] rowA = Bytes.toBytes("rowA"); + byte[] rowB = Bytes.toBytes("rowB"); + byte[] cf = Bytes.toBytes("CF"); + byte[][] families = { cf }; + byte[] col = Bytes.toBytes("C"); + long ts = 1; + String method = this.getName(); + this.region = initHRegion(tableName, method, families); + try { + KeyValue kv1 = new KeyValue(rowC, cf, col, ts, KeyValue.Type.Put, null); + KeyValue kv11 = new KeyValue(rowC, cf, col, ts + 1, KeyValue.Type.Put, + null); + KeyValue kv2 = new KeyValue(rowA, cf, col, ts, KeyValue.Type.Put, null); + KeyValue kv3 = new KeyValue(rowB, cf, col, ts, KeyValue.Type.Put, null); + Put put = null; + put = new Put(rowC); + put.add(kv1); + put.add(kv11); + region.put(put); + put = new Put(rowA); + put.add(kv2); + region.put(put); + put = new Put(rowB); + put.add(kv3); + region.put(put); + Scan scan = new Scan(); + List currRow = new ArrayList(); + scan.setReversed(true); + InternalScanner scanner = region.getScanner(scan); + boolean hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowC, 0, rowC.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowB, 0, rowB.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowA, 0, rowA.length)); + assertFalse(hasNext); + scanner.close(); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test (timeout=60000) + public void testReverseScanner_moreRowsMayExistAfter() throws IOException { + // case for "INCLUDE_AND_SEEK_NEXT_ROW & SEEK_NEXT_ROW" endless loop + byte[] rowA = Bytes.toBytes("rowA"); + byte[] rowB = Bytes.toBytes("rowB"); + byte[] rowC = Bytes.toBytes("rowC"); + byte[] rowD = Bytes.toBytes("rowD"); + byte[] rowE = Bytes.toBytes("rowE"); + byte[] cf = Bytes.toBytes("CF"); + byte[][] families = { cf }; + byte[] col1 = Bytes.toBytes("col1"); + byte[] col2 = Bytes.toBytes("col2"); + long ts = 1; + String method = this.getName(); + this.region = initHRegion(tableName, method, families); + try { + KeyValue kv1 = new KeyValue(rowA, cf, col1, ts, KeyValue.Type.Put, null); + KeyValue kv2 = new KeyValue(rowB, cf, col1, ts, KeyValue.Type.Put, null); + KeyValue kv3 = new KeyValue(rowC, cf, col1, ts, KeyValue.Type.Put, null); + KeyValue kv4_1 = new KeyValue(rowD, cf, col1, ts, KeyValue.Type.Put, null); + KeyValue kv4_2 = new KeyValue(rowD, cf, col2, ts, KeyValue.Type.Put, null); + KeyValue kv5 = new KeyValue(rowE, cf, col1, ts, KeyValue.Type.Put, null); + Put put = null; + put = new Put(rowA); + put.add(kv1); + region.put(put); + put = new Put(rowB); + put.add(kv2); + region.put(put); + put = new Put(rowC); + put.add(kv3); + region.put(put); + put = new Put(rowD); + put.add(kv4_1); + region.put(put); + put = new Put(rowD); + put.add(kv4_2); + region.put(put); + put = new Put(rowE); + put.add(kv5); + region.put(put); + region.flush(true); + Scan scan = new Scan(rowD, rowA); + scan.addColumn(families[0], col1); + scan.setReversed(true); + List currRow = new ArrayList(); + InternalScanner scanner = region.getScanner(scan); + boolean hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowD, 0, rowD.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowC, 0, rowC.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowB, 0, rowB.length)); + assertFalse(hasNext); + scanner.close(); + + scan = new Scan(rowD, rowA); + scan.addColumn(families[0], col2); + scan.setReversed(true); + currRow.clear(); + scanner = region.getScanner(scan); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowD, 0, rowD.length)); + scanner.close(); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test (timeout=60000) + public void testReverseScanner_smaller_blocksize() throws IOException { + // case to ensure no conflict with HFile index optimization + byte[] rowA = Bytes.toBytes("rowA"); + byte[] rowB = Bytes.toBytes("rowB"); + byte[] rowC = Bytes.toBytes("rowC"); + byte[] rowD = Bytes.toBytes("rowD"); + byte[] rowE = Bytes.toBytes("rowE"); + byte[] cf = Bytes.toBytes("CF"); + byte[][] families = { cf }; + byte[] col1 = Bytes.toBytes("col1"); + byte[] col2 = Bytes.toBytes("col2"); + long ts = 1; + String method = this.getName(); + HBaseConfiguration config = new HBaseConfiguration(); + config.setInt("test.block.size", 1); + this.region = initHRegion(tableName, method, config, families); + try { + KeyValue kv1 = new KeyValue(rowA, cf, col1, ts, KeyValue.Type.Put, null); + KeyValue kv2 = new KeyValue(rowB, cf, col1, ts, KeyValue.Type.Put, null); + KeyValue kv3 = new KeyValue(rowC, cf, col1, ts, KeyValue.Type.Put, null); + KeyValue kv4_1 = new KeyValue(rowD, cf, col1, ts, KeyValue.Type.Put, null); + KeyValue kv4_2 = new KeyValue(rowD, cf, col2, ts, KeyValue.Type.Put, null); + KeyValue kv5 = new KeyValue(rowE, cf, col1, ts, KeyValue.Type.Put, null); + Put put = null; + put = new Put(rowA); + put.add(kv1); + region.put(put); + put = new Put(rowB); + put.add(kv2); + region.put(put); + put = new Put(rowC); + put.add(kv3); + region.put(put); + put = new Put(rowD); + put.add(kv4_1); + region.put(put); + put = new Put(rowD); + put.add(kv4_2); + region.put(put); + put = new Put(rowE); + put.add(kv5); + region.put(put); + region.flush(true); + Scan scan = new Scan(rowD, rowA); + scan.addColumn(families[0], col1); + scan.setReversed(true); + List currRow = new ArrayList(); + InternalScanner scanner = region.getScanner(scan); + boolean hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowD, 0, rowD.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowC, 0, rowC.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowB, 0, rowB.length)); + assertFalse(hasNext); + scanner.close(); + + scan = new Scan(rowD, rowA); + scan.addColumn(families[0], col2); + scan.setReversed(true); + currRow.clear(); + scanner = region.getScanner(scan); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), rowD, 0, rowD.length)); + scanner.close(); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test (timeout=60000) + public void testReverseScanner_FromMemStoreAndHFiles_MultiCFs1() + throws IOException { + byte[] row0 = Bytes.toBytes("row0"); // 1 kv + byte[] row1 = Bytes.toBytes("row1"); // 2 kv + byte[] row2 = Bytes.toBytes("row2"); // 4 kv + byte[] row3 = Bytes.toBytes("row3"); // 2 kv + byte[] row4 = Bytes.toBytes("row4"); // 5 kv + byte[] row5 = Bytes.toBytes("row5"); // 2 kv + byte[] cf1 = Bytes.toBytes("CF1"); + byte[] cf2 = Bytes.toBytes("CF2"); + byte[] cf3 = Bytes.toBytes("CF3"); + byte[][] families = { cf1, cf2, cf3 }; + byte[] col = Bytes.toBytes("C"); + long ts = 1; + String method = this.getName(); + HBaseConfiguration conf = new HBaseConfiguration(); + // disable compactions in this test. + conf.setInt("hbase.hstore.compactionThreshold", 10000); + this.region = initHRegion(tableName, method, conf, families); + try { + // kv naming style: kv(row number) totalKvCountInThisRow seq no + KeyValue kv0_1_1 = new KeyValue(row0, cf1, col, ts, KeyValue.Type.Put, + null); + KeyValue kv1_2_1 = new KeyValue(row1, cf2, col, ts, KeyValue.Type.Put, + null); + KeyValue kv1_2_2 = new KeyValue(row1, cf1, col, ts + 1, + KeyValue.Type.Put, null); + KeyValue kv2_4_1 = new KeyValue(row2, cf2, col, ts, KeyValue.Type.Put, + null); + KeyValue kv2_4_2 = new KeyValue(row2, cf1, col, ts, KeyValue.Type.Put, + null); + KeyValue kv2_4_3 = new KeyValue(row2, cf3, col, ts, KeyValue.Type.Put, + null); + KeyValue kv2_4_4 = new KeyValue(row2, cf1, col, ts + 4, + KeyValue.Type.Put, null); + KeyValue kv3_2_1 = new KeyValue(row3, cf2, col, ts, KeyValue.Type.Put, + null); + KeyValue kv3_2_2 = new KeyValue(row3, cf1, col, ts + 4, + KeyValue.Type.Put, null); + KeyValue kv4_5_1 = new KeyValue(row4, cf1, col, ts, KeyValue.Type.Put, + null); + KeyValue kv4_5_2 = new KeyValue(row4, cf3, col, ts, KeyValue.Type.Put, + null); + KeyValue kv4_5_3 = new KeyValue(row4, cf3, col, ts + 5, + KeyValue.Type.Put, null); + KeyValue kv4_5_4 = new KeyValue(row4, cf2, col, ts, KeyValue.Type.Put, + null); + KeyValue kv4_5_5 = new KeyValue(row4, cf1, col, ts + 3, + KeyValue.Type.Put, null); + KeyValue kv5_2_1 = new KeyValue(row5, cf2, col, ts, KeyValue.Type.Put, + null); + KeyValue kv5_2_2 = new KeyValue(row5, cf3, col, ts, KeyValue.Type.Put, + null); + // hfiles(cf1/cf2) :"row1"(1 kv) / "row2"(1 kv) / "row4"(2 kv) + Put put = null; + put = new Put(row1); + put.add(kv1_2_1); + region.put(put); + put = new Put(row2); + put.add(kv2_4_1); + region.put(put); + put = new Put(row4); + put.add(kv4_5_4); + put.add(kv4_5_5); + region.put(put); + region.flush(true); + // hfiles(cf1/cf3) : "row1" (1 kvs) / "row2" (1 kv) / "row4" (2 kv) + put = new Put(row4); + put.add(kv4_5_1); + put.add(kv4_5_3); + region.put(put); + put = new Put(row1); + put.add(kv1_2_2); + region.put(put); + put = new Put(row2); + put.add(kv2_4_4); + region.put(put); + region.flush(true); + // hfiles(cf1/cf3) : "row2"(2 kv) / "row3"(1 kvs) / "row4" (1 kv) + put = new Put(row4); + put.add(kv4_5_2); + region.put(put); + put = new Put(row2); + put.add(kv2_4_2); + put.add(kv2_4_3); + region.put(put); + put = new Put(row3); + put.add(kv3_2_2); + region.put(put); + region.flush(true); + // memstore(cf1/cf2/cf3) : "row0" (1 kvs) / "row3" ( 1 kv) / "row5" (max) + // ( 2 kv) + put = new Put(row0); + put.add(kv0_1_1); + region.put(put); + put = new Put(row3); + put.add(kv3_2_1); + region.put(put); + put = new Put(row5); + put.add(kv5_2_1); + put.add(kv5_2_2); + region.put(put); + // scan range = ["row4", min), skip the max "row5" + Scan scan = new Scan(row4); + scan.setMaxVersions(5); + scan.setBatch(3); + scan.setReversed(true); + InternalScanner scanner = region.getScanner(scan); + List currRow = new ArrayList(); + boolean hasNext = false; + // 1. scan out "row4" (5 kvs), "row5" can't be scanned out since not + // included in scan range + // "row4" takes 2 next() calls since batch=3 + hasNext = scanner.next(currRow); + assertEquals(3, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row4, 0, row4.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(2, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow.get(0).getRowLength(), row4, 0, + row4.length)); + assertTrue(hasNext); + // 2. scan out "row3" (2 kv) + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(2, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row3, 0, row3.length)); + assertTrue(hasNext); + // 3. scan out "row2" (4 kvs) + // "row2" takes 2 next() calls since batch=3 + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(3, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row2, 0, row2.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row2, 0, row2.length)); + assertTrue(hasNext); + // 4. scan out "row1" (2 kv) + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(2, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row1, 0, row1.length)); + assertTrue(hasNext); + // 5. scan out "row0" (1 kv) + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row0, 0, row0.length)); + assertFalse(hasNext); + + scanner.close(); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test (timeout=60000) + public void testReverseScanner_FromMemStoreAndHFiles_MultiCFs2() + throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] row2 = Bytes.toBytes("row2"); + byte[] row3 = Bytes.toBytes("row3"); + byte[] row4 = Bytes.toBytes("row4"); + byte[] cf1 = Bytes.toBytes("CF1"); + byte[] cf2 = Bytes.toBytes("CF2"); + byte[] cf3 = Bytes.toBytes("CF3"); + byte[] cf4 = Bytes.toBytes("CF4"); + byte[][] families = { cf1, cf2, cf3, cf4 }; + byte[] col = Bytes.toBytes("C"); + long ts = 1; + String method = this.getName(); + HBaseConfiguration conf = new HBaseConfiguration(); + // disable compactions in this test. + conf.setInt("hbase.hstore.compactionThreshold", 10000); + this.region = initHRegion(tableName, method, conf, families); + try { + KeyValue kv1 = new KeyValue(row1, cf1, col, ts, KeyValue.Type.Put, null); + KeyValue kv2 = new KeyValue(row2, cf2, col, ts, KeyValue.Type.Put, null); + KeyValue kv3 = new KeyValue(row3, cf3, col, ts, KeyValue.Type.Put, null); + KeyValue kv4 = new KeyValue(row4, cf4, col, ts, KeyValue.Type.Put, null); + // storefile1 + Put put = new Put(row1); + put.add(kv1); + region.put(put); + region.flush(true); + // storefile2 + put = new Put(row2); + put.add(kv2); + region.put(put); + region.flush(true); + // storefile3 + put = new Put(row3); + put.add(kv3); + region.put(put); + region.flush(true); + // memstore + put = new Put(row4); + put.add(kv4); + region.put(put); + // scan range = ["row4", min) + Scan scan = new Scan(row4); + scan.setReversed(true); + scan.setBatch(10); + InternalScanner scanner = region.getScanner(scan); + List currRow = new ArrayList(); + boolean hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row4, 0, row4.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row3, 0, row3.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row2, 0, row2.length)); + assertTrue(hasNext); + currRow.clear(); + hasNext = scanner.next(currRow); + assertEquals(1, currRow.size()); + assertTrue(Bytes.equals(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), currRow + .get(0).getRowLength(), row1, 0, row1.length)); + assertFalse(hasNext); + } finally { + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + } + + @Test (timeout=60000) + public void testSplitRegionWithReverseScan() throws IOException { + TableName tableName = TableName.valueOf("testSplitRegionWithReverseScan"); + byte [] qualifier = Bytes.toBytes("qualifier"); + Configuration hc = initSplit(); + int numRows = 3; + byte [][] families = {fam1}; + + //Setting up region + String method = this.getName(); + this.region = initHRegion(tableName, method, hc, families); + + //Put data in region + int startRow = 100; + putData(startRow, numRows, qualifier, families); + int splitRow = startRow + numRows; + putData(splitRow, numRows, qualifier, families); + int endRow = splitRow + numRows; + region.flush(true); + + HRegion [] regions = null; + try { + regions = splitRegion(region, Bytes.toBytes("" + splitRow)); + //Opening the regions returned. + for (int i = 0; i < regions.length; i++) { + regions[i] = HRegion.openHRegion(regions[i], null); + } + //Verifying that the region has been split + assertEquals(2, regions.length); + + //Verifying that all data is still there and that data is in the right + //place + verifyData(regions[0], startRow, numRows, qualifier, families); + verifyData(regions[1], splitRow, numRows, qualifier, families); + + //fire the reverse scan1: top range, and larger than the last row + Scan scan = new Scan(Bytes.toBytes(String.valueOf(startRow + 10 * numRows))); + scan.setReversed(true); + InternalScanner scanner = regions[1].getScanner(scan); + List currRow = new ArrayList(); + boolean more = false; + int verify = startRow + 2 * numRows - 1; + do { + more = scanner.next(currRow); + assertEquals(Bytes.toString(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), + currRow.get(0).getRowLength()), verify + ""); + verify--; + currRow.clear(); + } while(more); + assertEquals(verify, startRow + numRows - 1); + scanner.close(); + //fire the reverse scan2: top range, and equals to the last row + scan = new Scan(Bytes.toBytes(String.valueOf(startRow + 2 * numRows - 1))); + scan.setReversed(true); + scanner = regions[1].getScanner(scan); + verify = startRow + 2 * numRows - 1; + do { + more = scanner.next(currRow); + assertEquals(Bytes.toString(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), + currRow.get(0).getRowLength()), verify + ""); + verify--; + currRow.clear(); + } while(more); + assertEquals(verify, startRow + numRows - 1); + scanner.close(); + //fire the reverse scan3: bottom range, and larger than the last row + scan = new Scan(Bytes.toBytes(String.valueOf(startRow + numRows))); + scan.setReversed(true); + scanner = regions[0].getScanner(scan); + verify = startRow + numRows - 1; + do { + more = scanner.next(currRow); + assertEquals(Bytes.toString(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), + currRow.get(0).getRowLength()), verify + ""); + verify--; + currRow.clear(); + } while(more); + assertEquals(verify, 99); + scanner.close(); + //fire the reverse scan4: bottom range, and equals to the last row + scan = new Scan(Bytes.toBytes(String.valueOf(startRow + numRows - 1))); + scan.setReversed(true); + scanner = regions[0].getScanner(scan); + verify = startRow + numRows - 1; + do { + more = scanner.next(currRow); + assertEquals(Bytes.toString(currRow.get(0).getRowArray(), currRow.get(0).getRowOffset(), + currRow.get(0).getRowLength()), verify + ""); + verify--; + currRow.clear(); + } while(more); + assertEquals(verify, startRow - 1); + scanner.close(); + } finally { + this.region.close(); + this.region = null; + } + } + + @Test + public void testWriteRequestsCounter() throws IOException { + byte[] fam = Bytes.toBytes("info"); + byte[][] families = { fam }; + this.region = initHRegion(tableName, method, CONF, families); + + Assert.assertEquals(0L, region.getWriteRequestsCount()); + + Put put = new Put(row); + put.add(fam, fam, fam); + + Assert.assertEquals(0L, region.getWriteRequestsCount()); + region.put(put); + Assert.assertEquals(1L, region.getWriteRequestsCount()); + region.put(put); + Assert.assertEquals(2L, region.getWriteRequestsCount()); + region.put(put); + Assert.assertEquals(3L, region.getWriteRequestsCount()); + + region.delete(new Delete(row)); + Assert.assertEquals(4L, region.getWriteRequestsCount()); + + HBaseTestingUtility.closeRegionAndWAL(this.region); + this.region = null; + } + + @Test + @SuppressWarnings("unchecked") + public void testOpenRegionWrittenToWAL() throws Exception { + final ServerName serverName = ServerName.valueOf("testOpenRegionWrittenToWAL", 100, 42); + final RegionServerServices rss = spy(TEST_UTIL.createMockRegionServerService(serverName)); + + HTableDescriptor htd + = new HTableDescriptor(TableName.valueOf("testOpenRegionWrittenToWAL")); + htd.addFamily(new HColumnDescriptor(fam1)); + htd.addFamily(new HColumnDescriptor(fam2)); + + HRegionInfo hri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY); + + // open the region w/o rss and wal and flush some files + HRegion region = + HBaseTestingUtility.createRegionAndWAL(hri, TEST_UTIL.getDataTestDir(), TEST_UTIL + .getConfiguration(), htd); + assertNotNull(region); + + // create a file in fam1 for the region before opening in OpenRegionHandler + region.put(new Put(Bytes.toBytes("a")).add(fam1, fam1, fam1)); + region.flush(true); + HBaseTestingUtility.closeRegionAndWAL(region); + + ArgumentCaptor editCaptor = ArgumentCaptor.forClass(WALEdit.class); + + // capture append() calls + WAL wal = mock(WAL.class); + when(rss.getWAL((HRegionInfo) any())).thenReturn(wal); + + try { + region = HRegion.openHRegion(hri, htd, rss.getWAL(hri), + TEST_UTIL.getConfiguration(), rss, null); + + verify(wal, times(1)).append((HTableDescriptor)any(), (HRegionInfo)any(), (WALKey)any() + , editCaptor.capture(), anyBoolean()); + + WALEdit edit = editCaptor.getValue(); + assertNotNull(edit); + assertNotNull(edit.getCells()); + assertEquals(1, edit.getCells().size()); + RegionEventDescriptor desc = WALEdit.getRegionEventDescriptor(edit.getCells().get(0)); + assertNotNull(desc); + + LOG.info("RegionEventDescriptor from WAL: " + desc); + + assertEquals(RegionEventDescriptor.EventType.REGION_OPEN, desc.getEventType()); + assertTrue(Bytes.equals(desc.getTableName().toByteArray(), htd.getName())); + assertTrue(Bytes.equals(desc.getEncodedRegionName().toByteArray(), + hri.getEncodedNameAsBytes())); + assertTrue(desc.getLogSequenceNumber() > 0); + assertEquals(serverName, ProtobufUtil.toServerName(desc.getServer())); + assertEquals(2, desc.getStoresCount()); + + StoreDescriptor store = desc.getStores(0); + assertTrue(Bytes.equals(store.getFamilyName().toByteArray(), fam1)); + assertEquals(store.getStoreHomeDir(), Bytes.toString(fam1)); + assertEquals(1, store.getStoreFileCount()); // 1store file + assertFalse(store.getStoreFile(0).contains("/")); // ensure path is relative + + store = desc.getStores(1); + assertTrue(Bytes.equals(store.getFamilyName().toByteArray(), fam2)); + assertEquals(store.getStoreHomeDir(), Bytes.toString(fam2)); + assertEquals(0, store.getStoreFileCount()); // no store files + + } finally { + HBaseTestingUtility.closeRegionAndWAL(region); + } + } + + // Helper for test testOpenRegionWrittenToWALForLogReplay + static class HRegionWithSeqId extends HRegion { + public HRegionWithSeqId(final Path tableDir, final WAL wal, final FileSystem fs, + final Configuration confParam, final HRegionInfo regionInfo, + final HTableDescriptor htd, final RegionServerServices rsServices) { + super(tableDir, wal, fs, confParam, regionInfo, htd, rsServices); + } + @Override + protected long getNextSequenceId(WAL wal) throws IOException { + return 42; + } + } + + @Test + public void testFlushedFileWithNoTags() throws Exception { + TableName tableName = TableName.valueOf(getClass().getSimpleName()); + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(new HColumnDescriptor(fam1)); + HRegionInfo info = new HRegionInfo(tableName, null, null, false); + Path path = TEST_UTIL.getDataTestDir(getClass().getSimpleName()); + region = HBaseTestingUtility.createRegionAndWAL(info, path, TEST_UTIL.getConfiguration(), htd); + Put put = new Put(Bytes.toBytes("a-b-0-0")); + put.addColumn(fam1, qual1, Bytes.toBytes("c1-value")); + region.put(put); + region.flush(true); + Store store = region.getStore(fam1); + Collection storefiles = store.getStorefiles(); + for (StoreFile sf : storefiles) { + assertFalse("Tags should not be present " + ,sf.getReader().getHFileReader().getFileContext().isIncludesTags()); + } + } + @Test + @SuppressWarnings("unchecked") + public void testOpenRegionWrittenToWALForLogReplay() throws Exception { + // similar to the above test but with distributed log replay + final ServerName serverName = ServerName.valueOf("testOpenRegionWrittenToWALForLogReplay", + 100, 42); + final RegionServerServices rss = spy(TEST_UTIL.createMockRegionServerService(serverName)); + + HTableDescriptor htd + = new HTableDescriptor(TableName.valueOf("testOpenRegionWrittenToWALForLogReplay")); + htd.addFamily(new HColumnDescriptor(fam1)); + htd.addFamily(new HColumnDescriptor(fam2)); + + HRegionInfo hri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY); + + // open the region w/o rss and wal and flush some files + HRegion region = + HBaseTestingUtility.createRegionAndWAL(hri, TEST_UTIL.getDataTestDir(), TEST_UTIL + .getConfiguration(), htd); + assertNotNull(region); + + // create a file in fam1 for the region before opening in OpenRegionHandler + region.put(new Put(Bytes.toBytes("a")).add(fam1, fam1, fam1)); + region.flush(true); + HBaseTestingUtility.closeRegionAndWAL(region); + + ArgumentCaptor editCaptor = ArgumentCaptor.forClass(WALEdit.class); + + // capture append() calls + WAL wal = mock(WAL.class); + when(rss.getWAL((HRegionInfo) any())).thenReturn(wal); + + // add the region to recovering regions + HashMap recoveringRegions = Maps.newHashMap(); + recoveringRegions.put(region.getRegionInfo().getEncodedName(), null); + when(rss.getRecoveringRegions()).thenReturn(recoveringRegions); + + try { + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + conf.set(HConstants.REGION_IMPL, HRegionWithSeqId.class.getName()); + region = HRegion.openHRegion(hri, htd, rss.getWAL(hri), + conf, rss, null); + + // verify that we have not appended region open event to WAL because this region is still + // recovering + verify(wal, times(0)).append((HTableDescriptor)any(), (HRegionInfo)any(), (WALKey)any() + , editCaptor.capture(), anyBoolean()); + + // not put the region out of recovering state + new FinishRegionRecoveringHandler(rss, region.getRegionInfo().getEncodedName(), "/foo") + .prepare().process(); + + // now we should have put the entry + verify(wal, times(1)).append((HTableDescriptor)any(), (HRegionInfo)any(), (WALKey)any() + , editCaptor.capture(), anyBoolean()); + + WALEdit edit = editCaptor.getValue(); + assertNotNull(edit); + assertNotNull(edit.getCells()); + assertEquals(1, edit.getCells().size()); + RegionEventDescriptor desc = WALEdit.getRegionEventDescriptor(edit.getCells().get(0)); + assertNotNull(desc); + + LOG.info("RegionEventDescriptor from WAL: " + desc); + + assertEquals(RegionEventDescriptor.EventType.REGION_OPEN, desc.getEventType()); + assertTrue(Bytes.equals(desc.getTableName().toByteArray(), htd.getName())); + assertTrue(Bytes.equals(desc.getEncodedRegionName().toByteArray(), + hri.getEncodedNameAsBytes())); + assertTrue(desc.getLogSequenceNumber() > 0); + assertEquals(serverName, ProtobufUtil.toServerName(desc.getServer())); + assertEquals(2, desc.getStoresCount()); + + StoreDescriptor store = desc.getStores(0); + assertTrue(Bytes.equals(store.getFamilyName().toByteArray(), fam1)); + assertEquals(store.getStoreHomeDir(), Bytes.toString(fam1)); + assertEquals(1, store.getStoreFileCount()); // 1store file + assertFalse(store.getStoreFile(0).contains("/")); // ensure path is relative + + store = desc.getStores(1); + assertTrue(Bytes.equals(store.getFamilyName().toByteArray(), fam2)); + assertEquals(store.getStoreHomeDir(), Bytes.toString(fam2)); + assertEquals(0, store.getStoreFileCount()); // no store files + + } finally { + HBaseTestingUtility.closeRegionAndWAL(region); + } + } + + @Test + @SuppressWarnings("unchecked") + public void testCloseRegionWrittenToWAL() throws Exception { + final ServerName serverName = ServerName.valueOf("testCloseRegionWrittenToWAL", 100, 42); + final RegionServerServices rss = spy(TEST_UTIL.createMockRegionServerService(serverName)); + + HTableDescriptor htd + = new HTableDescriptor(TableName.valueOf("testOpenRegionWrittenToWAL")); + htd.addFamily(new HColumnDescriptor(fam1)); + htd.addFamily(new HColumnDescriptor(fam2)); + + HRegionInfo hri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY); + + ArgumentCaptor editCaptor = ArgumentCaptor.forClass(WALEdit.class); + + // capture append() calls + WAL wal = mock(WAL.class); + when(rss.getWAL((HRegionInfo) any())).thenReturn(wal); + + // open a region first so that it can be closed later + region = HRegion.openHRegion(hri, htd, rss.getWAL(hri), + TEST_UTIL.getConfiguration(), rss, null); + + // close the region + region.close(false); + + // 2 times, one for region open, the other close region + verify(wal, times(2)).append((HTableDescriptor)any(), (HRegionInfo)any(), (WALKey)any(), + editCaptor.capture(), anyBoolean()); + + WALEdit edit = editCaptor.getAllValues().get(1); + assertNotNull(edit); + assertNotNull(edit.getCells()); + assertEquals(1, edit.getCells().size()); + RegionEventDescriptor desc = WALEdit.getRegionEventDescriptor(edit.getCells().get(0)); + assertNotNull(desc); + + LOG.info("RegionEventDescriptor from WAL: " + desc); + + assertEquals(RegionEventDescriptor.EventType.REGION_CLOSE, desc.getEventType()); + assertTrue(Bytes.equals(desc.getTableName().toByteArray(), htd.getName())); + assertTrue(Bytes.equals(desc.getEncodedRegionName().toByteArray(), + hri.getEncodedNameAsBytes())); + assertTrue(desc.getLogSequenceNumber() > 0); + assertEquals(serverName, ProtobufUtil.toServerName(desc.getServer())); + assertEquals(2, desc.getStoresCount()); + + StoreDescriptor store = desc.getStores(0); + assertTrue(Bytes.equals(store.getFamilyName().toByteArray(), fam1)); + assertEquals(store.getStoreHomeDir(), Bytes.toString(fam1)); + assertEquals(0, store.getStoreFileCount()); // no store files + + store = desc.getStores(1); + assertTrue(Bytes.equals(store.getFamilyName().toByteArray(), fam2)); + assertEquals(store.getStoreHomeDir(), Bytes.toString(fam2)); + assertEquals(0, store.getStoreFileCount()); // no store files + } + + /** + * Test RegionTooBusyException thrown when region is busy + */ + @Test (timeout=24000) + public void testRegionTooBusy() throws IOException { + String method = "testRegionTooBusy"; + TableName tableName = TableName.valueOf(method); + byte[] family = Bytes.toBytes("family"); + long defaultBusyWaitDuration = CONF.getLong("hbase.busy.wait.duration", + HRegion.DEFAULT_BUSY_WAIT_DURATION); + CONF.setLong("hbase.busy.wait.duration", 1000); + region = initHRegion(tableName, method, CONF, family); + final AtomicBoolean stopped = new AtomicBoolean(true); + Thread t = new Thread(new Runnable() { + @Override + public void run() { + try { + region.lock.writeLock().lock(); + stopped.set(false); + while (!stopped.get()) { + Thread.sleep(100); + } + } catch (InterruptedException ie) { + } finally { + region.lock.writeLock().unlock(); + } + } + }); + t.start(); + Get get = new Get(row); + try { + while (stopped.get()) { + Thread.sleep(100); + } + region.get(get); + fail("Should throw RegionTooBusyException"); + } catch (InterruptedException ie) { + fail("test interrupted"); + } catch (RegionTooBusyException e) { + // Good, expected + } finally { + stopped.set(true); + try { + t.join(); + } catch (Throwable e) { + } + + HBaseTestingUtility.closeRegionAndWAL(region); + region = null; + CONF.setLong("hbase.busy.wait.duration", defaultBusyWaitDuration); + } + } + + @Test + public void testCellTTLs() throws IOException { + IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(); + EnvironmentEdgeManager.injectEdge(edge); + + final byte[] row = Bytes.toBytes("testRow"); + final byte[] q1 = Bytes.toBytes("q1"); + final byte[] q2 = Bytes.toBytes("q2"); + final byte[] q3 = Bytes.toBytes("q3"); + final byte[] q4 = Bytes.toBytes("q4"); + + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("testCellTTLs")); + HColumnDescriptor hcd = new HColumnDescriptor(fam1); + hcd.setTimeToLive(10); // 10 seconds + htd.addFamily(hcd); + + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS); + + HRegion region = HBaseTestingUtility.createRegionAndWAL(new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY), + TEST_UTIL.getDataTestDir(), conf, htd); + assertNotNull(region); + try { + long now = EnvironmentEdgeManager.currentTime(); + // Add a cell that will expire in 5 seconds via cell TTL + region.put(new Put(row).add(new KeyValue(row, fam1, q1, now, + HConstants.EMPTY_BYTE_ARRAY, new Tag[] { + // TTL tags specify ts in milliseconds + new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(5000L)) } ))); + // Add a cell that will expire after 10 seconds via family setting + region.put(new Put(row).add(fam1, q2, now, HConstants.EMPTY_BYTE_ARRAY)); + // Add a cell that will expire in 15 seconds via cell TTL + region.put(new Put(row).add(new KeyValue(row, fam1, q3, now + 10000 - 1, + HConstants.EMPTY_BYTE_ARRAY, new Tag[] { + // TTL tags specify ts in milliseconds + new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(5000L)) } ))); + // Add a cell that will expire in 20 seconds via family setting + region.put(new Put(row).add(fam1, q4, now + 10000 - 1, HConstants.EMPTY_BYTE_ARRAY)); + + // Flush so we are sure store scanning gets this right + region.flush(true); + + // A query at time T+0 should return all cells + Result r = region.get(new Get(row)); + assertNotNull(r.getValue(fam1, q1)); + assertNotNull(r.getValue(fam1, q2)); + assertNotNull(r.getValue(fam1, q3)); + assertNotNull(r.getValue(fam1, q4)); + + // Increment time to T+5 seconds + edge.incrementTime(5000); + + r = region.get(new Get(row)); + assertNull(r.getValue(fam1, q1)); + assertNotNull(r.getValue(fam1, q2)); + assertNotNull(r.getValue(fam1, q3)); + assertNotNull(r.getValue(fam1, q4)); + + // Increment time to T+10 seconds + edge.incrementTime(5000); + + r = region.get(new Get(row)); + assertNull(r.getValue(fam1, q1)); + assertNull(r.getValue(fam1, q2)); + assertNotNull(r.getValue(fam1, q3)); + assertNotNull(r.getValue(fam1, q4)); + + // Increment time to T+15 seconds + edge.incrementTime(5000); + + r = region.get(new Get(row)); + assertNull(r.getValue(fam1, q1)); + assertNull(r.getValue(fam1, q2)); + assertNull(r.getValue(fam1, q3)); + assertNotNull(r.getValue(fam1, q4)); + + // Increment time to T+20 seconds + edge.incrementTime(10000); + + r = region.get(new Get(row)); + assertNull(r.getValue(fam1, q1)); + assertNull(r.getValue(fam1, q2)); + assertNull(r.getValue(fam1, q3)); + assertNull(r.getValue(fam1, q4)); + + // Fun with disappearing increments + + // Start at 1 + region.put(new Put(row).add(fam1, q1, Bytes.toBytes(1L))); + r = region.get(new Get(row)); + byte[] val = r.getValue(fam1, q1); + assertNotNull(val); + assertEquals(Bytes.toLong(val), 1L); + + // Increment with a TTL of 5 seconds + Increment incr = new Increment(row).addColumn(fam1, q1, 1L); + incr.setTTL(5000); + region.increment(incr); // 2 + + // New value should be 2 + r = region.get(new Get(row)); + val = r.getValue(fam1, q1); + assertNotNull(val); + assertEquals(Bytes.toLong(val), 2L); + + // Increment time to T+25 seconds + edge.incrementTime(5000); + + // Value should be back to 1 + r = region.get(new Get(row)); + val = r.getValue(fam1, q1); + assertNotNull(val); + assertEquals(Bytes.toLong(val), 1L); + + // Increment time to T+30 seconds + edge.incrementTime(5000); + + // Original value written at T+20 should be gone now via family TTL + r = region.get(new Get(row)); + assertNull(r.getValue(fam1, q1)); + + } finally { + HBaseTestingUtility.closeRegionAndWAL(region); + } + } + + static HRegion initHRegion(TableName tableName, String callingMethod, + byte[]... families) throws IOException { + return initHRegion(tableName, callingMethod, HBaseConfiguration.create(), + families); + } +} + diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java index f4d9dac..3d8ad06 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java @@ -125,11 +125,11 @@ public class TestHeapMemoryManager { final ChoreService choreService = new ChoreService("TEST_SERVER_NAME"); heapMemoryManager.start(choreService); memStoreFlusher.flushType = FlushType.ABOVE_HIGHER_MARK; - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); memStoreFlusher.flushType = FlushType.ABOVE_LOWER_MARK; - memStoreFlusher.requestFlush(null, false); + memStoreFlusher.requestFlush(null, false, false); Thread.sleep(1500); // Allow the tuner to run once and do necessary memory up // No changes should be made by tuner as we already have lot of empty space assertEquals(oldMemstoreHeapSize, memStoreFlusher.memstoreSize); @@ -190,10 +190,10 @@ public class TestHeapMemoryManager { final ChoreService choreService = new ChoreService("TEST_SERVER_NAME"); heapMemoryManager.start(choreService); memStoreFlusher.flushType = FlushType.ABOVE_LOWER_MARK; - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); Thread.sleep(1500); // Allow the tuner to run once and do necessary memory up assertHeapSpaceDelta(DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE, oldMemstoreHeapSize, memStoreFlusher.memstoreSize); @@ -203,8 +203,8 @@ public class TestHeapMemoryManager { oldBlockCacheSize = blockCache.maxSize; // Do some more flushes before the next run of HeapMemoryTuner memStoreFlusher.flushType = FlushType.ABOVE_LOWER_MARK; - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); Thread.sleep(1500); assertHeapSpaceDelta(DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE, oldMemstoreHeapSize, memStoreFlusher.memstoreSize); @@ -277,9 +277,9 @@ public class TestHeapMemoryManager { final ChoreService choreService = new ChoreService("TEST_SERVER_NAME"); heapMemoryManager.start(choreService); memStoreFlusher.flushType = FlushType.ABOVE_LOWER_MARK; - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); blockCache.evictBlock(null); Thread.sleep(1500); // Allow the tuner to run once and do necessary memory up // No changes should happen as there is undefined increase in flushes and evictions @@ -287,9 +287,9 @@ public class TestHeapMemoryManager { assertEquals(oldBlockCacheSize, blockCache.maxSize); // Do some more flushes before the next run of HeapMemoryTuner memStoreFlusher.flushType = FlushType.ABOVE_LOWER_MARK; - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); Thread.sleep(1500); assertHeapSpaceDelta(DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE, oldMemstoreHeapSize, memStoreFlusher.memstoreSize); @@ -321,9 +321,9 @@ public class TestHeapMemoryManager { final ChoreService choreService = new ChoreService("TEST_SERVER_NAME"); heapMemoryManager.start(choreService); memStoreFlusher.flushType = FlushType.ABOVE_LOWER_MARK; - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); - memStoreFlusher.requestFlush(null, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); + memStoreFlusher.requestFlush(null, false, false); blockCache.evictBlock(null); blockCache.evictBlock(null); Thread.sleep(1500); // Allow the tuner to run once and do necessary memory up @@ -332,7 +332,7 @@ public class TestHeapMemoryManager { assertEquals(oldBlockCacheSize, blockCache.maxSize); // Flushes that block updates memStoreFlusher.flushType = FlushType.ABOVE_HIGHER_MARK; - memStoreFlusher.requestFlush(null, false); + memStoreFlusher.requestFlush(null, false, false); blockCache.evictBlock(null); blockCache.evictBlock(null); blockCache.evictBlock(null); @@ -601,7 +601,7 @@ public class TestHeapMemoryManager { } @Override - public void requestFlush(Region region, boolean forceFlushAllStores) { + public void requestFlush(Region region, boolean forceFlushAllStores, boolean forceCompacted) { this.listener.flushRequested(flushType, region); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMemStoreChunkPool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMemStoreChunkPool.java index 80333e8..1bf18f4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMemStoreChunkPool.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMemStoreChunkPool.java @@ -18,12 +18,6 @@ */ package org.apache.hadoop.hbase.regionserver; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.util.List; -import java.util.Random; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.testclassification.RegionServerTests; @@ -36,6 +30,13 @@ import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; +import java.io.IOException; +import java.util.List; +import java.util.Random; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + /** * Test the {@link MemStoreChunkPool} class */ @@ -47,7 +48,7 @@ public class TestMemStoreChunkPool { @BeforeClass public static void setUpBeforeClass() throws Exception { - conf.setBoolean(DefaultMemStore.USEMSLAB_KEY, true); + conf.setBoolean(StoreSegmentFactory.USEMSLAB_KEY, true); conf.setFloat(MemStoreChunkPool.CHUNK_POOL_MAXSIZE_KEY, 0.2f); chunkPoolDisabledBeforeTest = MemStoreChunkPool.chunkPoolDisabled; MemStoreChunkPool.chunkPoolDisabled = false; @@ -115,14 +116,14 @@ public class TestMemStoreChunkPool { memstore.add(new KeyValue(row, fam, qf3, val)); // Creating a snapshot - MemStoreSnapshot snapshot = memstore.snapshot(); - assertEquals(3, memstore.snapshot.size()); + MemStoreSnapshot snapshot = memstore.snapshot(0); + assertEquals(3, memstore.getSnapshot().getCellsCount()); // Adding value to "new" memstore - assertEquals(0, memstore.cellSet.size()); + assertEquals(0, memstore.getActive().getCellsCount()); memstore.add(new KeyValue(row, fam, qf4, val)); memstore.add(new KeyValue(row, fam, qf5, val)); - assertEquals(2, memstore.cellSet.size()); + assertEquals(2, memstore.getActive().getCellsCount()); memstore.clearSnapshot(snapshot.getId()); int chunkCount = chunkPool.getPoolSize(); @@ -132,7 +133,7 @@ public class TestMemStoreChunkPool { @Test public void testPuttingBackChunksWithOpeningScanner() - throws UnexpectedStateException { + throws IOException { byte[] row = Bytes.toBytes("testrow"); byte[] fam = Bytes.toBytes("testfamily"); byte[] qf1 = Bytes.toBytes("testqualifier1"); @@ -152,14 +153,14 @@ public class TestMemStoreChunkPool { memstore.add(new KeyValue(row, fam, qf3, val)); // Creating a snapshot - MemStoreSnapshot snapshot = memstore.snapshot(); - assertEquals(3, memstore.snapshot.size()); + MemStoreSnapshot snapshot = memstore.snapshot(0); + assertEquals(3, memstore.getSnapshot().getCellsCount()); // Adding value to "new" memstore - assertEquals(0, memstore.cellSet.size()); + assertEquals(0, memstore.getActive().getCellsCount()); memstore.add(new KeyValue(row, fam, qf4, val)); memstore.add(new KeyValue(row, fam, qf5, val)); - assertEquals(2, memstore.cellSet.size()); + assertEquals(2, memstore.getActive().getCellsCount()); // opening scanner before clear the snapshot List scanners = memstore.getScanners(0); @@ -179,7 +180,7 @@ public class TestMemStoreChunkPool { chunkPool.clearChunks(); // Creating another snapshot - snapshot = memstore.snapshot(); + snapshot = memstore.snapshot(0); // Adding more value memstore.add(new KeyValue(row, fam, qf6, val)); memstore.add(new KeyValue(row, fam, qf7, val)); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java index c520422..ff0f1b8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java @@ -18,17 +18,7 @@ */ package org.apache.hadoop.hbase.regionserver; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.NavigableSet; - +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileSystem; @@ -63,7 +53,16 @@ import org.apache.hadoop.hbase.util.Pair; import org.junit.Test; import org.junit.experimental.categories.Category; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.NavigableSet; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; /** * Test cases against ReversibleKeyValueScanner */ @@ -673,7 +672,7 @@ public class TestReversibleScanners { } } } - memstore.snapshot(); + memstore.snapshot(0); // Add another half of the keyvalues to snapshot for (int i = 0; i < ROWSIZE; i++) { for (int j = 0; j < QUALSIZE; j++) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitWalDataLoss.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitWalDataLoss.java index 92e0558..393e1de 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitWalDataLoss.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitWalDataLoss.java @@ -123,7 +123,7 @@ public class TestSplitWalDataLoss { long oldestSeqIdOfStore = region.getOldestSeqIdOfStore(family); Log.info("CHANGE OLDEST " + oldestSeqIdOfStore); assertTrue(oldestSeqIdOfStore > HConstants.NO_SEQNUM); - rs.cacheFlusher.requestFlush(spiedRegion, false); + rs.cacheFlusher.requestFlush(spiedRegion, false, false); synchronized (flushed) { while (!flushed.booleanValue()) { flushed.wait(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java index 6b669a0..1c288b0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java @@ -19,25 +19,7 @@ package org.apache.hadoop.hbase.regionserver; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; - -import java.io.IOException; -import java.lang.ref.SoftReference; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableSet; -import java.util.concurrent.ConcurrentSkipListSet; -import java.util.concurrent.atomic.AtomicBoolean; +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -73,8 +55,6 @@ import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration; import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; import org.apache.hadoop.hbase.regionserver.compactions.NoLimitCompactionThroughputController; -import org.apache.hadoop.hbase.wal.DefaultWALProvider; -import org.apache.hadoop.hbase.wal.WALFactory; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -82,6 +62,8 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.IncrementingEnvironmentEdge; import org.apache.hadoop.hbase.util.ManualEnvironmentEdge; +import org.apache.hadoop.hbase.wal.DefaultWALProvider; +import org.apache.hadoop.hbase.wal.WALFactory; import org.apache.hadoop.util.Progressable; import org.junit.After; import org.junit.Assert; @@ -92,7 +74,21 @@ import org.junit.experimental.categories.Category; import org.junit.rules.TestName; import org.mockito.Mockito; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.lang.ref.SoftReference; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableSet; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.junit.Assert.*; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.*; /** * Test class for the Store @@ -555,7 +551,7 @@ public class TestStore { this.store.snapshot(); flushStore(store, id++); Assert.assertEquals(storeFilessize, this.store.getStorefiles().size()); - Assert.assertEquals(0, ((DefaultMemStore)this.store.memstore).cellSet.size()); + Assert.assertEquals(0, ((DefaultMemStore)this.store.memstore).getActive().getCellsCount()); } private void assertCheck() { @@ -600,7 +596,7 @@ public class TestStore { flushStore(store, id++); Assert.assertEquals(1, this.store.getStorefiles().size()); // from the one we inserted up there, and a new one - Assert.assertEquals(2, ((DefaultMemStore)this.store.memstore).cellSet.size()); + Assert.assertEquals(2, ((DefaultMemStore)this.store.memstore).getActive().getCellsCount()); // how many key/values for this row are there? Get get = new Get(row); @@ -674,7 +670,7 @@ public class TestStore { } long computedSize=0; - for (Cell cell : ((DefaultMemStore)this.store.memstore).cellSet) { + for (Cell cell : ((DefaultMemStore)this.store.memstore).getActive().getCellSet()) { long kvsize = DefaultMemStore.heapSizeChange(cell, true); //System.out.println(kv + " size= " + kvsize + " kvsize= " + kv.heapSize()); computedSize += kvsize; @@ -706,7 +702,7 @@ public class TestStore { // then flush. flushStore(store, id++); Assert.assertEquals(1, this.store.getStorefiles().size()); - Assert.assertEquals(1, ((DefaultMemStore)this.store.memstore).cellSet.size()); + Assert.assertEquals(1, ((DefaultMemStore)this.store.memstore).getActive().getCellsCount()); // now increment again: newValue += 1; @@ -865,7 +861,7 @@ public class TestStore { private static void flushStore(HStore store, long id) throws IOException { StoreFlushContext storeFlushCtx = store.createFlushContext(id); - storeFlushCtx.prepare(); + storeFlushCtx.prepareFlushToDisk(id); storeFlushCtx.flushCache(Mockito.mock(MonitoredTask.class)); storeFlushCtx.commit(Mockito.mock(MonitoredTask.class)); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWalAndCompactedMemstoreFlush.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWalAndCompactedMemstoreFlush.java new file mode 100644 index 0000000..b1cf9a0 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWalAndCompactedMemstoreFlush.java @@ -0,0 +1,505 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.regionserver.wal.FSHLog; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.wal.WAL; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * This test verifies the correctness of the Per Column Family flushing strategy + * when part of the memstores are compacted memstores + */ +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestWalAndCompactedMemstoreFlush { + + private static final Log LOG = LogFactory.getLog(TestWalAndCompactedMemstoreFlush.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final Path DIR = TEST_UTIL.getDataTestDir("TestHRegion"); + public static final TableName TABLENAME = TableName.valueOf("TestWalAndCompactedMemstoreFlush", "t1"); + + public static final byte[][] FAMILIES = { Bytes.toBytes("f1"), Bytes.toBytes("f2"), + Bytes.toBytes("f3"), Bytes.toBytes("f4"), Bytes.toBytes("f5") }; + + public static final byte[] FAMILY1 = FAMILIES[0]; + public static final byte[] FAMILY2 = FAMILIES[1]; + public static final byte[] FAMILY3 = FAMILIES[2]; + + + + private HRegion initHRegion(String callingMethod, Configuration conf) throws IOException { + int i=0; + HTableDescriptor htd = new HTableDescriptor(TABLENAME); + for (byte[] family : FAMILIES) { + HColumnDescriptor hcd = new HColumnDescriptor(family); + if(i%2 == 0) hcd.setInMemory(true); // even column families are going to be in-memory + htd.addFamily(hcd); + i++; + } + + HRegionInfo info = new HRegionInfo(TABLENAME, null, null, false); + Path path = new Path(DIR, callingMethod); + return HBaseTestingUtility.createRegionAndWAL(info, path, conf, htd); + } + + + + // A helper function to create puts. + private Put createPut(int familyNum, int putNum) { + byte[] qf = Bytes.toBytes("q" + familyNum); + byte[] row = Bytes.toBytes("row" + familyNum + "-" + putNum); + byte[] val = Bytes.toBytes("val" + familyNum + "-" + putNum); + Put p = new Put(row); + p.addColumn(FAMILIES[familyNum - 1], qf, val); + return p; + } + + + // A helper function to create double puts, so something can be compacted later. + private Put createDoublePut(int familyNum, int putNum) { + byte[] qf = Bytes.toBytes("q" + familyNum); + byte[] row = Bytes.toBytes("row" + familyNum + "-" + putNum); + byte[] val = Bytes.toBytes("val" + familyNum + "-" + putNum); + Put p = new Put(row); + // add twice with different timestamps + p.addColumn(FAMILIES[familyNum - 1], qf, 10, val); + p.addColumn(FAMILIES[familyNum - 1], qf, 20, val); + return p; + } + + + // A helper function to create gets. + private Get createGet(int familyNum, int putNum) { + byte[] row = Bytes.toBytes("row" + familyNum + "-" + putNum); + return new Get(row); + } + + + + + // A helper function to verify edits. + void verifyEdit(int familyNum, int putNum, Table table) throws IOException { + Result r = table.get(createGet(familyNum, putNum)); + byte[] family = FAMILIES[familyNum - 1]; + byte[] qf = Bytes.toBytes("q" + familyNum); + byte[] val = Bytes.toBytes("val" + familyNum + "-" + putNum); + assertNotNull(("Missing Put#" + putNum + " for CF# " + familyNum), r.getFamilyMap(family)); + assertNotNull(("Missing Put#" + putNum + " for CF# " + familyNum), + r.getFamilyMap(family).get(qf)); + assertTrue(("Incorrect value for Put#" + putNum + " for CF# " + familyNum), + Arrays.equals(r.getFamilyMap(family).get(qf), val)); + } + + + + + + @Test(timeout = 180000) + public void testSelectiveFlushWhenEnabled() throws IOException { + + // Set up the configuration + Configuration conf = HBaseConfiguration.create(); + conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 200 * 1024); + conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, FlushLargeStoresPolicy.class.getName()); + conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND, 100 * 1024); + + // Intialize the region + Region region = initHRegion("testSelectiveFlushWhenEnabled", conf); + + // Add 1200 entries for CF1, 100 for CF2 and 50 for CF3 + for (int i = 1; i <= 1200; i++) { + region.put(createPut(1, i)); // compacted memstore + + if (i <= 100) { + region.put(createPut(2, i)); + if (i <= 50) { + region.put(createDoublePut(3, i)); // subject for in-memory compaction + } + } + } + + long totalMemstoreSize = region.getMemstoreSize(); + + // Find the smallest LSNs for edits wrt to each CF. + long smallestSeqCF1PhaseI = region.getOldestSeqIdOfStore(FAMILY1); + long smallestSeqCF2PhaseI = region.getOldestSeqIdOfStore(FAMILY2); + long smallestSeqCF3PhaseI = region.getOldestSeqIdOfStore(FAMILY3); + + // Find the sizes of the memstores of each CF. + long cf1MemstoreSizePhaseI = region.getStore(FAMILY1).getMemStoreSize(); + long cf2MemstoreSizePhaseI = region.getStore(FAMILY2).getMemStoreSize(); + long cf3MemstoreSizePhaseI = region.getStore(FAMILY3).getMemStoreSize(); + + // Get the overall smallest LSN in the region's memstores. + long smallestSeqInRegionCurrentMemstorePhaseI = getWAL(region) + .getEarliestMemstoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes()); + + String s = "\n\n----------------------------------\n" + + "Upon initial insert and before any flush, size of CF1 is:" + + cf1MemstoreSizePhaseI + ", is CF1 compacted memstore?:" + + region.getStore(FAMILY1).isCompactedMemStore() + ". Size of CF2 is:" + + cf2MemstoreSizePhaseI + ", is CF2 compacted memstore?:" + + region.getStore(FAMILY2).isCompactedMemStore() + ". Size of CF3 is:" + + cf3MemstoreSizePhaseI + ", is CF3 compacted memstore?:" + + region.getStore(FAMILY3).isCompactedMemStore() + "\n"; + + // The overall smallest LSN in the region's memstores should be the same as + // the LSN of the smallest edit in CF1 + assertEquals(smallestSeqCF1PhaseI, smallestSeqInRegionCurrentMemstorePhaseI); + + // Some other sanity checks. + assertTrue(smallestSeqCF1PhaseI < smallestSeqCF2PhaseI); + assertTrue(smallestSeqCF2PhaseI < smallestSeqCF3PhaseI); + assertTrue(cf1MemstoreSizePhaseI > 0); + assertTrue(cf2MemstoreSizePhaseI > 0); + assertTrue(cf3MemstoreSizePhaseI > 0); + + // The total memstore size should be the same as the sum of the sizes of + // memstores of CF1, CF2 and CF3. + assertEquals(totalMemstoreSize + 3 * DefaultMemStore.DEEP_OVERHEAD, cf1MemstoreSizePhaseI + + cf2MemstoreSizePhaseI + cf3MemstoreSizePhaseI); + + // Flush!!!!!!!!!!!!!!!!!!!!!! + // We have big compacted memstore CF1 and two small memstores: + // CF2 (not compacted) and CF3 (compacted) + // All together they are above the flush size lower bound. + // Since CF1 and CF3 should be flushed to memory (not to disk), + // CF2 is going to be flushed to disk. + // CF1 - nothing to compact, CF3 - should be twice compacted + region.flush(false,false); + + // CF3 should be compacted so wait here to be sure the compaction is done + while (region.getStore(FAMILY3).isMemStoreInCompaction()) Threads.sleep(10); + + // Recalculate everything + long cf1MemstoreSizePhaseII = region.getStore(FAMILY1).getMemStoreSize(); + long cf2MemstoreSizePhaseII = region.getStore(FAMILY2).getMemStoreSize(); + long cf3MemstoreSizePhaseII = region.getStore(FAMILY3).getMemStoreSize(); + + long smallestSeqInRegionCurrentMemstorePhaseII = getWAL(region) + .getEarliestMemstoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes()); + // Find the smallest LSNs for edits wrt to each CF. + long smallestSeqCF1PhaseII = region.getOldestSeqIdOfStore(FAMILY1); + long smallestSeqCF2PhaseII = region.getOldestSeqIdOfStore(FAMILY2); + long smallestSeqCF3PhaseII = region.getOldestSeqIdOfStore(FAMILY3); + + s = s + "DefaultMemStore DEEP_OVERHEAD is:" + DefaultMemStore.DEEP_OVERHEAD + + ", CompactedMemStore DEEP_OVERHEAD is:" + CompactedMemStore.DEEP_OVERHEAD + + ", CompactedMemStore DEEP_OVERHEAD_PER_PIPELINE_ITEM is:" + CompactedMemStore.DEEP_OVERHEAD_PER_PIPELINE_ITEM + + "\n----After first flush! CF1 should be flushed to memory, but not compacted.---\n" + + "Size of CF1 is:" + cf1MemstoreSizePhaseII + ", size of CF2 is:" + cf2MemstoreSizePhaseII + + ", size of CF3 is:" + cf3MemstoreSizePhaseII + "\n"; + + // CF1 was flushed to memory, but there is nothing to compact, should + // remain the same size plus renewed empty skip-list + assertEquals(cf1MemstoreSizePhaseII, + cf1MemstoreSizePhaseI + CompactedMemStore.DEEP_OVERHEAD_PER_PIPELINE_ITEM); + + // CF2 should become empty + assertEquals(DefaultMemStore.DEEP_OVERHEAD, cf2MemstoreSizePhaseII); + + // verify that CF3 was flushed to memory and was compacted (this is aproximation check) + assertTrue(cf3MemstoreSizePhaseI/2+DefaultMemStore.DEEP_OVERHEAD > cf3MemstoreSizePhaseII); + assertTrue(cf3MemstoreSizePhaseI/2 < cf3MemstoreSizePhaseII); + + + // Now the smallest LSN in the region should be the same as the smallest + // LSN in the memstore of CF1. + assertEquals(smallestSeqInRegionCurrentMemstorePhaseII, smallestSeqCF1PhaseI); + + // Now add more puts for CF1, so that we also flush CF1 to disk instead of + // memory in next flush + for (int i = 1200; i < 3600; i++) { + region.put(createPut(1, i)); + } + + s = s + "The smallest sequence in region WAL is: " + smallestSeqInRegionCurrentMemstorePhaseII + + ", the smallest sequence in CF1:" + smallestSeqCF1PhaseII + ", the smallest sequence in CF2:" + + smallestSeqCF2PhaseII +", the smallest sequence in CF3:" + smallestSeqCF3PhaseII + "\n"; + + // How much does the CF1 memstore occupy? Will be used later. + long cf1MemstoreSizePhaseIII = region.getStore(FAMILY1).getMemStoreSize(); + long smallestSeqCF1PhaseIII = region.getOldestSeqIdOfStore(FAMILY1); + + s = s + "----After more puts into CF1 its size is:" + cf1MemstoreSizePhaseIII + + ", and its sequence is:" + smallestSeqCF1PhaseIII + " ----\n" ; + + + // Flush!!!!!!!!!!!!!!!!!!!!!! + // Flush again, CF1 is flushed to memory and its pipeline element is flushed to disk + // CF2 is flushed to disk, because it is not in-memory compacted memstore + // CF3 is flushed empty to memory (actually nothing happens to CF3) + region.flush(false, false); + + // Recalculate everything + long cf1MemstoreSizePhaseIV = region.getStore(FAMILY1).getMemStoreSize(); + long cf2MemstoreSizePhaseIV = region.getStore(FAMILY2).getMemStoreSize(); + long cf3MemstoreSizePhaseIV = region.getStore(FAMILY3).getMemStoreSize(); + + long smallestSeqInRegionCurrentMemstorePhaseIV = getWAL(region) + .getEarliestMemstoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes()); + long smallestSeqCF1PhaseIV = region.getOldestSeqIdOfStore(FAMILY1); + long smallestSeqCF2PhaseIV = region.getOldestSeqIdOfStore(FAMILY2); + long smallestSeqCF3PhaseIV = region.getOldestSeqIdOfStore(FAMILY3); + + s = s + "----After SECOND FLUSH, CF1 size is:" + cf1MemstoreSizePhaseIV + ", CF2 size is:" + + cf2MemstoreSizePhaseIV + " and CF3 size is:" + cf3MemstoreSizePhaseIV + + "\n"; + + s = s + "The smallest sequence in region WAL is: " + smallestSeqInRegionCurrentMemstorePhaseIV + + ", the smallest sequence in CF1:" + smallestSeqCF1PhaseIV + ", the smallest sequence in CF2:" + + smallestSeqCF2PhaseIV +", the smallest sequence in CF3:" + smallestSeqCF3PhaseIV + + "\n"; + + // CF1's pipeline component (inserted before first flush) should be flushed to disk + // and previous active set flushed to the memory. CF2 should be flushed to disk + assertEquals(cf1MemstoreSizePhaseIII - cf1MemstoreSizePhaseI + DefaultMemStore.DEEP_OVERHEAD, + cf1MemstoreSizePhaseIV); + assertEquals(DefaultMemStore.DEEP_OVERHEAD, cf2MemstoreSizePhaseIV); + + // CF3 shouldn't have been touched. + assertEquals(cf3MemstoreSizePhaseIV, cf3MemstoreSizePhaseII); + + // the smallest LSN of CF3 shouldn't change + assertEquals(smallestSeqCF3PhaseII, smallestSeqCF3PhaseIV); + + // CF1 or CF3 should be bottleneck for WAL + assertEquals(s, smallestSeqInRegionCurrentMemstorePhaseIV, + ((smallestSeqCF1PhaseIV 0); + assertTrue(cf2MemstoreSizePhaseI > 0); + assertTrue(cf3MemstoreSizePhaseI > 0); + + // The total memstore size should be the same as the sum of the sizes of + // memstores of CF1, CF2 and CF3. + assertEquals(totalMemstoreSize + 3 * DefaultMemStore.DEEP_OVERHEAD, + cf1MemstoreSizePhaseI + cf2MemstoreSizePhaseI + cf3MemstoreSizePhaseI); + + // Flush! + region.flush(false, false); + + long cf2MemstoreSizePhaseII = region.getStore(FAMILY2).getMemStoreSize(); + + long smallestSeqInRegionCurrentMemstorePhaseII = + region.getWAL().getEarliestMemstoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes()); + long smallestSeqCF1PhaseII = region.getOldestSeqIdOfStore(FAMILY1); + long smallestSeqCF2PhaseII = region.getOldestSeqIdOfStore(FAMILY2); + long smallestSeqCF3PhaseII = region.getOldestSeqIdOfStore(FAMILY3); + + // CF2 should have been cleared + assertEquals(DefaultMemStore.DEEP_OVERHEAD, cf2MemstoreSizePhaseII); + + String s = "\n\n----------------------------------\n" + + "Upon initial insert and flush, LSN of CF1 is:" + + smallestSeqCF1PhaseII + ". LSN of CF2 is:" + + smallestSeqCF2PhaseII + ". LSN of CF3 is:" + + smallestSeqCF3PhaseII + ", smallestSeqInRegionCurrentMemstore:" + + smallestSeqInRegionCurrentMemstorePhaseII + "\n"; + + // Add same entries to compact them later + for (int i = 1; i <= 1200; i++) { + region.put(createPut(1, i)); + if (i <= 100) { + region.put(createPut(2, i)); + if (i <= 50) { + region.put(createPut(3, i)); + } + } + } + + long smallestSeqInRegionCurrentMemstorePhaseIII = + region.getWAL().getEarliestMemstoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes()); + long smallestSeqCF1PhaseIII = region.getOldestSeqIdOfStore(FAMILY1); + long smallestSeqCF2PhaseIII = region.getOldestSeqIdOfStore(FAMILY2); + long smallestSeqCF3PhaseIII = region.getOldestSeqIdOfStore(FAMILY3); + + s = s + "The smallest sequence in region WAL is: " + smallestSeqInRegionCurrentMemstorePhaseIII + + ", the smallest sequence in CF1:" + smallestSeqCF1PhaseIII + ", the smallest sequence in CF2:" + + smallestSeqCF2PhaseIII +", the smallest sequence in CF3:" + smallestSeqCF3PhaseIII + "\n"; + + // Flush! + region.flush(false, false); + + // CF1 and CF3 should be compacted so wait here to be sure the compaction is done + while (region.getStore(FAMILY1).isMemStoreInCompaction()) Threads.sleep(10); + while (region.getStore(FAMILY3).isMemStoreInCompaction()) Threads.sleep(10); + + long smallestSeqInRegionCurrentMemstorePhaseIV = + region.getWAL().getEarliestMemstoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes()); + long smallestSeqCF1PhaseIV = region.getOldestSeqIdOfStore(FAMILY1); + long smallestSeqCF2PhaseIV = region.getOldestSeqIdOfStore(FAMILY2); + long smallestSeqCF3PhaseIV = region.getOldestSeqIdOfStore(FAMILY3); + + s = s + "The smallest sequence in region WAL is: " + smallestSeqInRegionCurrentMemstorePhaseIV + + ", the smallest sequence in CF1:" + smallestSeqCF1PhaseIV + ", the smallest sequence in CF2:" + + smallestSeqCF2PhaseIV +", the smallest sequence in CF3:" + smallestSeqCF3PhaseIV + "\n"; + + // now check that the LSN of the entire WAL, of CF1 and of CF3 has progressed due to compaction + assertTrue(smallestSeqInRegionCurrentMemstorePhaseIV > smallestSeqInRegionCurrentMemstorePhaseIII); + assertTrue(smallestSeqCF1PhaseIV > smallestSeqCF1PhaseIII); + assertTrue(smallestSeqCF3PhaseIV > smallestSeqCF3PhaseIII); + + HBaseTestingUtility.closeRegionAndWAL(region); + } + + + + + + // Find the (first) region which has the specified name. + private static Pair getRegionWithName(TableName tableName) { + MiniHBaseCluster cluster = TEST_UTIL.getMiniHBaseCluster(); + List rsts = cluster.getRegionServerThreads(); + for (int i = 0; i < cluster.getRegionServerThreads().size(); i++) { + HRegionServer hrs = rsts.get(i).getRegionServer(); + for (Region region : hrs.getOnlineRegions(tableName)) { + return Pair.newPair(region, hrs); + } + } + return null; + } + + + private WAL getWAL(Region region) { + return ((HRegion)region).getWAL(); + } + + private int getNumRolledLogFiles(Region region) { + return ((FSHLog)getWAL(region)).getNumRolledLogFiles(); + } + + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java index 1c97a2d..c013331 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java @@ -89,6 +89,23 @@ import org.junit.experimental.categories.Category; import org.junit.rules.TestName; import org.mockito.Mockito; +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.when; + /** * Test replay of edits out of a WAL split. */ @@ -822,12 +839,13 @@ public class TestWALReplay { new HRegion(basedir, newWal, newFS, newConf, hri, htd, null) { @Override protected FlushResult internalFlushcache(final WAL wal, final long myseqid, - final Collection storesToFlush, MonitoredTask status, + final Collection storesToFlushToDisk, + Collection specificStoresToFlushInMemory, MonitoredTask status, boolean writeFlushWalMarker) throws IOException { LOG.info("InternalFlushCache Invoked"); - FlushResult fs = super.internalFlushcache(wal, myseqid, storesToFlush, - Mockito.mock(MonitoredTask.class), writeFlushWalMarker); + FlushResult fs = super.internalFlushcache(wal, myseqid, storesToFlushToDisk, + specificStoresToFlushInMemory, Mockito.mock(MonitoredTask.class), writeFlushWalMarker); flushcount.incrementAndGet(); return fs; } @@ -953,7 +971,7 @@ public class TestWALReplay { private HRegion r; @Override - public void requestFlush(Region region, boolean force) { + public void requestFlush(Region region, boolean force, boolean forceCompacted) { try { r.flush(force); } catch (IOException e) { -- 1.7.10.2 (Apple Git-33)