From c5d93e154fbac242c5ac79b1bb41af073c86fa76 Mon Sep 17 00:00:00 2001 From: eshcar Date: Tue, 11 Aug 2015 15:39:46 +0300 Subject: [PATCH] HBASE-13408 trunk version: not including WAL truncation --- .../procedure/flush/FlushTableSubprocedure.java | 9 +- .../hbase/regionserver/AbstractMemStore.java | 454 +++++++ .../apache/hadoop/hbase/regionserver/CellSet.java | 204 +++ .../hadoop/hbase/regionserver/CellSkipListSet.java | 185 --- .../hbase/regionserver/CompactedMemStore.java | 287 ++++ .../hbase/regionserver/CompactionPipeline.java | 223 +++ .../hadoop/hbase/regionserver/DefaultMemStore.java | 864 +----------- .../apache/hadoop/hbase/regionserver/HRegion.java | 155 ++- .../hadoop/hbase/regionserver/HRegionServer.java | 88 +- .../apache/hadoop/hbase/regionserver/HStore.java | 86 +- .../apache/hadoop/hbase/regionserver/MemStore.java | 9 +- .../hbase/regionserver/MemStoreCompactor.java | 230 ++++ .../hadoop/hbase/regionserver/MemStoreFlusher.java | 6 +- .../hadoop/hbase/regionserver/MemStoreScanner.java | 301 +++++ .../hadoop/hbase/regionserver/MemStoreSegment.java | 361 +++++ .../hbase/regionserver/MemStoreSegmentScanner.java | 421 ++++++ .../hbase/regionserver/MemStoreSnapshot.java | 15 +- .../hadoop/hbase/regionserver/RSRpcServices.java | 36 +- .../apache/hadoop/hbase/regionserver/Region.java | 27 + .../hbase/regionserver/RegionServerAccounting.java | 15 +- .../apache/hadoop/hbase/regionserver/Store.java | 5 + .../hbase/regionserver/VersionedSegmentsList.java | 54 + .../hadoop/hbase/TestGlobalMemStoreSize.java | 16 +- .../org/apache/hadoop/hbase/TestIOFencing.java | 27 +- .../org/apache/hadoop/hbase/io/TestHeapSize.java | 49 +- .../hbase/regionserver/TestCellSkipListSet.java | 4 +- .../hbase/regionserver/TestCompactedMemStore.java | 1416 ++++++++++++++++++++ .../hbase/regionserver/TestDefaultMemStore.java | 96 +- .../hadoop/hbase/regionserver/TestHMobStore.java | 32 +- .../hadoop/hbase/regionserver/TestHRegion.java | 106 +- .../hbase/regionserver/TestMemStoreChunkPool.java | 29 +- .../hadoop/hbase/regionserver/TestStore.java | 48 +- 32 files changed, 4475 insertions(+), 1383 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMemStore.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSkipListSet.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactedMemStore.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionPipeline.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreCompactor.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreScanner.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSegment.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSegmentScanner.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/VersionedSegmentsList.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactedMemStore.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/FlushTableSubprocedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/FlushTableSubprocedure.java index 5723919..baa280e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/FlushTableSubprocedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/flush/FlushTableSubprocedure.java @@ -17,9 +17,6 @@ */ package org.apache.hadoop.hbase.procedure.flush; -import java.util.List; -import java.util.concurrent.Callable; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; @@ -30,6 +27,9 @@ import org.apache.hadoop.hbase.procedure.Subprocedure; import org.apache.hadoop.hbase.procedure.flush.RegionServerFlushTableProcedureManager.FlushTableSubprocedurePool; import org.apache.hadoop.hbase.regionserver.Region; +import java.util.List; +import java.util.concurrent.Callable; + /** * This flush region implementation uses the distributed procedure framework to flush * table regions. @@ -65,7 +65,8 @@ public class FlushTableSubprocedure extends Subprocedure { region.startRegionOperation(); try { LOG.debug("Flush region " + region.toString() + " started..."); - region.flush(true); + boolean forceFlushInsteadOfCompaction = false; + region.flush(true,forceFlushInsteadOfCompaction); } finally { LOG.debug("Closing region operation on " + region); region.closeRegionOperation(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMemStore.java new file mode 100644 index 0000000..138a7ad --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMemStore.java @@ -0,0 +1,454 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Pair; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableSet; +import java.util.SortedSet; + +/** + * An abstract class, which implements the behaviour shared by all concrete memstore instances. + */ +@InterfaceAudience.Private +public abstract class AbstractMemStore implements MemStore { + + private final Configuration conf; + private final CellComparator comparator; + + // active segment absorbs write operations + volatile private MemStoreSegment active; + // Snapshot of memstore. Made for flusher. + volatile private MemStoreSegment snapshot; + volatile long snapshotId; + // Used to track when to flush + volatile private long timeOfOldestEdit; + + public final static long FIXED_OVERHEAD = ClassSize.align( + ClassSize.OBJECT + + (4 * ClassSize.REFERENCE) + + (2 * Bytes.SIZEOF_LONG)); + + public final static long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD + + 2 * (ClassSize.ATOMIC_LONG + ClassSize.TIMERANGE_TRACKER + + ClassSize.CELL_SKIPLIST_SET + ClassSize.CONCURRENT_SKIPLISTMAP)); + + + protected AbstractMemStore(final Configuration conf, final CellComparator c) { + this.conf = conf; + this.comparator = c; + resetCellSet(); + this.snapshot = MemStoreSegment.Factory.instance().createMemStoreSegment( + CellSet.Type.EMPTY, conf, c, 0); + + } + + protected void resetCellSet() { + // Reset heap to not include any keys + this.active = MemStoreSegment.Factory.instance().createMemStoreSegment( + CellSet.Type.READ_WRITE, conf, comparator, deepOverhead()); + this.timeOfOldestEdit = Long.MAX_VALUE; + } + + /* + * Calculate how the MemStore size has changed. Includes overhead of the + * backing Map. + * @param cell + * @param notpresent True if the cell was NOT present in the set. + * @return Size + */ + static long heapSizeChange(final Cell cell, final boolean notpresent) { + return notpresent ? ClassSize.align(ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY + + CellUtil.estimatedHeapSizeOf(cell)) : 0; + } + + public abstract boolean shouldSeek(Scan scan, long oldestUnexpiredTS); + + public abstract AbstractMemStore setForceFlush(); + public abstract boolean isMemstoreCompaction(); + +// protected abstract long deepOverhead(); + protected long deepOverhead() { + return DEEP_OVERHEAD; + } + + /** + * Write an update + * @param cell + * @return approximate size of the passed KV & newly added KV which maybe different than the + * passed-in KV + */ + @Override + public Pair add(Cell cell) { + Cell toAdd = maybeCloneWithAllocator(cell); + return new Pair(internalAdd(toAdd), toAdd); + } + + /** + * Update or insert the specified KeyValues. + *

+ * For each KeyValue, insert into MemStore. This will atomically upsert the + * value for that row/family/qualifier. If a KeyValue did already exist, + * it will then be removed. + *

+ * Currently the memstoreTS is kept at 0 so as each insert happens, it will + * be immediately visible. May want to change this so it is atomic across + * all KeyValues. + *

+ * This is called under row lock, so Get operations will still see updates + * atomically. Scans will only see each KeyValue update as atomic. + * + * @param cells + * @param readpoint readpoint below which we can safely remove duplicate KVs + * @return change in memstore size + */ + @Override + public long upsert(Iterable cells, long readpoint) { + long size = 0; + for (Cell cell : cells) { + size += upsert(cell, readpoint); + } + return size; + } + + /** + * @return Oldest timestamp of all the Cells in the MemStore + */ + @Override + public long timeOfOldestEdit() { + return timeOfOldestEdit; + } + + + /** + * Write a delete + * @param deleteCell + * @return approximate size of the passed key and value. + */ + @Override + public long delete(Cell deleteCell) { + Cell toAdd = maybeCloneWithAllocator(deleteCell); + long s = internalAdd(toAdd); + return s; + } + + /** + * The passed snapshot was successfully persisted; it can be let go. + * @param id Id of the snapshot to clean out. + * @throws UnexpectedStateException + * @see #snapshot() + */ + @Override + public void clearSnapshot(long id) throws UnexpectedStateException { + if (this.snapshotId != id) { + throw new UnexpectedStateException("Current snapshot id is " + this.snapshotId + ",passed " + + id); + } + // OK. Passed in snapshot is same as current snapshot. If not-empty, + // create a new snapshot and let the old one go. + MemStoreSegment oldSnapshot = this.snapshot; + if (!this.snapshot.isEmpty()) { + this.snapshot = MemStoreSegment.Factory.instance().createMemStoreSegment( + CellSet.Type.EMPTY, getComparator(), 0); + } + this.snapshotId = -1; + oldSnapshot.close(); + } + + /** + * Get the entire heap usage for this MemStore not including keys in the + * snapshot. + */ + @Override + public long heapSize() { + return getActive().getSize(); + } + + /** + * @return scanner on memstore and snapshot in this order. + */ + @Override + public List getScanners(long readPt) throws IOException { + return Collections. singletonList(new MemStoreScanner(this, readPt)); + } + + @Override + public long getSnapshotSize() { + return getSnapshot().getSize(); + } + + protected void rollbackSnapshot(Cell cell) { + // If the key is in the snapshot, delete it. We should not update + // this.size, because that tracks the size of only the memstore and + // not the snapshot. The flush of this snapshot to disk has not + // yet started because Store.flush() waits for all rwcc transactions to + // commit before starting the flush to disk. + snapshot.rollback(cell); + } + + protected void rollbackCellSet(Cell cell) { + // If the key is in the memstore, delete it. Update this.size. + long sz = active.rollback(cell); + if (sz != 0) { + setOldestEditTimeToNow(); + } + } + + + protected void dump(Log log) { + for (Cell cell: this.active.getCellSet()) { + log.info(cell); + } + for (Cell cell: this.snapshot.getCellSet()) { + log.info(cell); + } + } + + + /** + * Inserts the specified KeyValue into MemStore and deletes any existing + * versions of the same row/family/qualifier as the specified KeyValue. + *

+ * First, the specified KeyValue is inserted into the Memstore. + *

+ * If there are any existing KeyValues in this MemStore with the same row, + * family, and qualifier, they are removed. + *

+ * Callers must hold the read lock. + * + * @param cell + * @return change in size of MemStore + */ + private long upsert(Cell cell, long readpoint) { + // Add the Cell to the MemStore + // Use the internalAdd method here since we (a) already have a lock + // and (b) cannot safely use the MSLAB here without potentially + // hitting OOME - see TestMemStore.testUpsertMSLAB for a + // test that triggers the pathological case if we don't avoid MSLAB + // here. + long addedSize = internalAdd(cell); + + // Get the Cells for the row/family/qualifier regardless of timestamp. + // For this case we want to clean up any other puts + Cell firstCell = KeyValueUtil.createFirstOnRow( + cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), + cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); + SortedSet ss = active.tailSet(firstCell); + Iterator it = ss.iterator(); + // versions visible to oldest scanner + int versionsVisible = 0; + while ( it.hasNext() ) { + Cell cur = it.next(); + + if (cell == cur) { + // ignore the one just put in + continue; + } + // check that this is the row and column we are interested in, otherwise bail + if (CellUtil.matchingRow(cell, cur) && CellUtil.matchingQualifier(cell, cur)) { + // only remove Puts that concurrent scanners cannot possibly see + if (cur.getTypeByte() == KeyValue.Type.Put.getCode() && + cur.getSequenceId() <= readpoint) { + if (versionsVisible >= 1) { + // if we get here we have seen at least one version visible to the oldest scanner, + // which means we can prove that no scanner will see this version + + // false means there was a change, so give us the size. + long delta = heapSizeChange(cur, true); + addedSize -= delta; + active.incSize(-delta); + it.remove(); + setOldestEditTimeToNow(); + } else { + versionsVisible++; + } + } + } else { + // past the row or column, done + break; + } + } + return addedSize; + } + + /* + * @param a + * @param b + * @return Return lowest of a or b or null if both a and b are null + */ + protected Cell getLowest(final Cell a, final Cell b) { + if (a == null) { + return b; + } + if (b == null) { + return a; + } + return comparator.compareRows(a, b) <= 0? a: b; + } + + /* + * @param key Find row that follows this one. If null, return first. + * @param map Set to look in for a row beyond row. + * @return Next row or null if none found. If one found, will be a new + * KeyValue -- can be destroyed by subsequent calls to this method. + */ + protected Cell getNextRow(final Cell key, + final NavigableSet set) { + Cell result = null; + SortedSet tail = key == null? set: set.tailSet(key); + // Iterate until we fall into the next row; i.e. move off current row + for (Cell cell: tail) { + if (comparator.compareRows(cell, key) <= 0) + continue; + // Note: Not suppressing deletes or expired cells. Needs to be handled + // by higher up functions. + result = cell; + break; + } + return result; + } + + /** + * Given the specs of a column, update it, first by inserting a new record, + * then removing the old one. Since there is only 1 KeyValue involved, the memstoreTS + * will be set to 0, thus ensuring that they instantly appear to anyone. The underlying + * store will ensure that the insert/delete each are atomic. A scanner/reader will either + * get the new value, or the old value and all readers will eventually only see the new + * value after the old was removed. + * + * @param row + * @param family + * @param qualifier + * @param newValue + * @param now + * @return Timestamp + */ + @VisibleForTesting + @Override + public long updateColumnValue(byte[] row, byte[] family, byte[] qualifier, + long newValue, long now) { + Cell firstCell = KeyValueUtil.createFirstOnRow(row, family, qualifier); + // Is there a Cell in 'snapshot' with the same TS? If so, upgrade the timestamp a bit. + SortedSet snSs = snapshot.tailSet(firstCell); + if (!snSs.isEmpty()) { + Cell snc = snSs.first(); + // is there a matching Cell in the snapshot? + if (CellUtil.matchingRow(snc, firstCell) && CellUtil.matchingQualifier(snc, firstCell)) { + if (snc.getTimestamp() == now) { + // poop, + now += 1; + } + } + } + // logic here: the new ts MUST be at least 'now'. But it could be larger if necessary. + // But the timestamp should also be max(now, mostRecentTsInMemstore) + + // so we cant add the new Cell w/o knowing what's there already, but we also + // want to take this chance to delete some cells. So two loops (sad) + + SortedSet ss = getActive().tailSet(firstCell); + for (Cell cell : ss) { + // if this isnt the row we are interested in, then bail: + if (!CellUtil.matchingColumn(cell, family, qualifier) + || !CellUtil.matchingRow(cell, firstCell)) { + break; // rows dont match, bail. + } + + // if the qualifier matches and it's a put, just RM it out of the active. + if (cell.getTypeByte() == KeyValue.Type.Put.getCode() && + cell.getTimestamp() > now && CellUtil.matchingQualifier(firstCell, cell)) { + now = cell.getTimestamp(); + } + } + + // create or update (upsert) a new Cell with + // 'now' and a 0 memstoreTS == immediately visible + List cells = new ArrayList(1); + cells.add(new KeyValue(row, family, qualifier, now, Bytes.toBytes(newValue))); + return upsert(cells, 1L); + } + + private Cell maybeCloneWithAllocator(Cell cell) { + return active.maybeCloneWithAllocator(cell); + } + + /** + * Internal version of add() that doesn't clone Cells with the + * allocator, and doesn't take the lock. + * + * Callers should ensure they already have the read lock taken + */ + private long internalAdd(final Cell toAdd) { + long s = active.add(toAdd); + setOldestEditTimeToNow(); + return s; + } + + private void setOldestEditTimeToNow() { + if (timeOfOldestEdit == Long.MAX_VALUE) { + timeOfOldestEdit = EnvironmentEdgeManager.currentTime(); + } + } + + protected long keySize() { + return heapSize() - deepOverhead(); + } + + protected CellComparator getComparator() { + return comparator; + } + + protected MemStoreSegment getActive() { + return active; + } + + protected MemStoreSegment getSnapshot() { + return snapshot; + } + + protected void setSnapshot(MemStoreSegment snapshot) { + this.snapshot = snapshot; + } + + protected void setSnapshotSize(long snapshotSize) { + getSnapshot().setSize(snapshotSize); + } + + abstract protected List getListOfScanners(long readPt) throws IOException; + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java new file mode 100644 index 0000000..1689d01 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java @@ -0,0 +1,204 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +import java.util.*; +import java.util.concurrent.ConcurrentNavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; + +/** + * A {@link java.util.Set} of {@link Cell}s implemented on top of a + * {@link java.util.concurrent.ConcurrentSkipListMap}. Works like a + * {@link java.util.concurrent.ConcurrentSkipListSet} in all but one regard: + * An add will overwrite if already an entry for the added key. In other words, + * where CSLS does "Adds the specified element to this set if it is not already + * present.", this implementation "Adds the specified element to this set EVEN + * if it is already present overwriting what was there previous". The call to + * add returns true if no value in the backing map or false if there was an + * entry with same key (though value may be different). + *

Otherwise, + * has same attributes as ConcurrentSkipListSet: e.g. tolerant of concurrent + * get and set and won't throw ConcurrentModificationException when iterating. + */ +@InterfaceAudience.Private +public class CellSet implements NavigableSet { + /** + * Types of cell set. + * This affects the internal implementation of the cell set objects. + * This allows using different formats for different purposes. + */ + static public enum Type { + READ_WRITE, + EMPTY, + COMPACTED_READ_ONLY, + DEFAULT + } + + private final ConcurrentNavigableMap delegatee; + + CellSet(final CellComparator c) { + this(Type.DEFAULT,c); + } + + CellSet(final Type type, final CellComparator c) { + switch (type) { + case READ_WRITE: + case EMPTY: + case COMPACTED_READ_ONLY: + case DEFAULT: + default: + this.delegatee = new ConcurrentSkipListMap(c); + } + } + + CellSet(final ConcurrentNavigableMap m) { + this.delegatee = m; + } + + public Cell ceiling(Cell e) { + throw new UnsupportedOperationException("Not implemented"); + } + + public Iterator descendingIterator() { + return this.delegatee.descendingMap().values().iterator(); + } + + public NavigableSet descendingSet() { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell floor(Cell e) { + throw new UnsupportedOperationException("Not implemented"); + } + + public SortedSet headSet(final Cell toElement) { + return headSet(toElement, false); + } + + public NavigableSet headSet(final Cell toElement, + boolean inclusive) { + return new CellSet(this.delegatee.headMap(toElement, inclusive)); + } + + public Cell higher(Cell e) { + throw new UnsupportedOperationException("Not implemented"); + } + + public Iterator iterator() { + return this.delegatee.values().iterator(); + } + + public Cell lower(Cell e) { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell pollFirst() { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell pollLast() { + throw new UnsupportedOperationException("Not implemented"); + } + + public SortedSet subSet(Cell fromElement, Cell toElement) { + throw new UnsupportedOperationException("Not implemented"); + } + + public NavigableSet subSet(Cell fromElement, + boolean fromInclusive, Cell toElement, boolean toInclusive) { + throw new UnsupportedOperationException("Not implemented"); + } + + public SortedSet tailSet(Cell fromElement) { + return tailSet(fromElement, true); + } + + public NavigableSet tailSet(Cell fromElement, boolean inclusive) { + return new CellSet(this.delegatee.tailMap(fromElement, inclusive)); + } + + public Comparator comparator() { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell first() { + return this.delegatee.get(this.delegatee.firstKey()); + } + + public Cell last() { + return this.delegatee.get(this.delegatee.lastKey()); + } + + public boolean add(Cell e) { + return this.delegatee.put(e, e) == null; + } + + public boolean addAll(Collection c) { + throw new UnsupportedOperationException("Not implemented"); + } + + public void clear() { + this.delegatee.clear(); + } + + public boolean contains(Object o) { + //noinspection SuspiciousMethodCalls + return this.delegatee.containsKey(o); + } + + public boolean containsAll(Collection c) { + throw new UnsupportedOperationException("Not implemented"); + } + + public boolean isEmpty() { + return this.delegatee.isEmpty(); + } + + public boolean remove(Object o) { + return this.delegatee.remove(o) != null; + } + + public boolean removeAll(Collection c) { + throw new UnsupportedOperationException("Not implemented"); + } + + public boolean retainAll(Collection c) { + throw new UnsupportedOperationException("Not implemented"); + } + + public Cell get(Cell kv) { + return this.delegatee.get(kv); + } + + public int size() { + return this.delegatee.size(); + } + + public Object[] toArray() { + throw new UnsupportedOperationException("Not implemented"); + } + + public T[] toArray(T[] a) { + throw new UnsupportedOperationException("Not implemented"); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSkipListSet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSkipListSet.java deleted file mode 100644 index e9941b3..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSkipListSet.java +++ /dev/null @@ -1,185 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver; - -import java.util.Collection; -import java.util.Comparator; -import java.util.Iterator; -import java.util.NavigableSet; -import java.util.SortedSet; -import java.util.concurrent.ConcurrentNavigableMap; -import java.util.concurrent.ConcurrentSkipListMap; - -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.classification.InterfaceAudience; - -/** - * A {@link java.util.Set} of {@link Cell}s implemented on top of a - * {@link java.util.concurrent.ConcurrentSkipListMap}. Works like a - * {@link java.util.concurrent.ConcurrentSkipListSet} in all but one regard: - * An add will overwrite if already an entry for the added key. In other words, - * where CSLS does "Adds the specified element to this set if it is not already - * present.", this implementation "Adds the specified element to this set EVEN - * if it is already present overwriting what was there previous". The call to - * add returns true if no value in the backing map or false if there was an - * entry with same key (though value may be different). - *

Otherwise, - * has same attributes as ConcurrentSkipListSet: e.g. tolerant of concurrent - * get and set and won't throw ConcurrentModificationException when iterating. - */ -@InterfaceAudience.Private -public class CellSkipListSet implements NavigableSet { - private final ConcurrentNavigableMap delegatee; - - CellSkipListSet(final CellComparator c) { - this.delegatee = new ConcurrentSkipListMap(c); - } - - CellSkipListSet(final ConcurrentNavigableMap m) { - this.delegatee = m; - } - - public Cell ceiling(Cell e) { - throw new UnsupportedOperationException("Not implemented"); - } - - public Iterator descendingIterator() { - return this.delegatee.descendingMap().values().iterator(); - } - - public NavigableSet descendingSet() { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell floor(Cell e) { - throw new UnsupportedOperationException("Not implemented"); - } - - public SortedSet headSet(final Cell toElement) { - return headSet(toElement, false); - } - - public NavigableSet headSet(final Cell toElement, - boolean inclusive) { - return new CellSkipListSet(this.delegatee.headMap(toElement, inclusive)); - } - - public Cell higher(Cell e) { - throw new UnsupportedOperationException("Not implemented"); - } - - public Iterator iterator() { - return this.delegatee.values().iterator(); - } - - public Cell lower(Cell e) { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell pollFirst() { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell pollLast() { - throw new UnsupportedOperationException("Not implemented"); - } - - public SortedSet subSet(Cell fromElement, Cell toElement) { - throw new UnsupportedOperationException("Not implemented"); - } - - public NavigableSet subSet(Cell fromElement, - boolean fromInclusive, Cell toElement, boolean toInclusive) { - throw new UnsupportedOperationException("Not implemented"); - } - - public SortedSet tailSet(Cell fromElement) { - return tailSet(fromElement, true); - } - - public NavigableSet tailSet(Cell fromElement, boolean inclusive) { - return new CellSkipListSet(this.delegatee.tailMap(fromElement, inclusive)); - } - - public Comparator comparator() { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell first() { - return this.delegatee.get(this.delegatee.firstKey()); - } - - public Cell last() { - return this.delegatee.get(this.delegatee.lastKey()); - } - - public boolean add(Cell e) { - return this.delegatee.put(e, e) == null; - } - - public boolean addAll(Collection c) { - throw new UnsupportedOperationException("Not implemented"); - } - - public void clear() { - this.delegatee.clear(); - } - - public boolean contains(Object o) { - //noinspection SuspiciousMethodCalls - return this.delegatee.containsKey(o); - } - - public boolean containsAll(Collection c) { - throw new UnsupportedOperationException("Not implemented"); - } - - public boolean isEmpty() { - return this.delegatee.isEmpty(); - } - - public boolean remove(Object o) { - return this.delegatee.remove(o) != null; - } - - public boolean removeAll(Collection c) { - throw new UnsupportedOperationException("Not implemented"); - } - - public boolean retainAll(Collection c) { - throw new UnsupportedOperationException("Not implemented"); - } - - public Cell get(Cell kv) { - return this.delegatee.get(kv); - } - - public int size() { - return this.delegatee.size(); - } - - public Object[] toArray() { - throw new UnsupportedOperationException("Not implemented"); - } - - public T[] toArray(T[] a) { - throw new UnsupportedOperationException("Not implemented"); - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactedMemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactedMemStore.java new file mode 100644 index 0000000..5bc70de --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactedMemStore.java @@ -0,0 +1,287 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; + +import java.io.IOException; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +/** + * A memstore implementation which supports in-memory compaction. + * A compaction pipeline is added between the active set and the snapshot data structures; + * it consists of a list of kv-sets that are subject to compaction. + * The semantics of the prepare-for-flush phase are changed: instead of shifting the current active + * set to snapshot, the active set is pushed into the pipeline. + * Like the snapshot, all pipeline components are read-only; updates only affect the active set. + * To ensure this property we take advantage of the existing blocking mechanism -- the active set + * is pushed to the pipeline while holding updatesLock in exclusive mode. + * + * Periodically, a compaction is applied in the background to all pipeline components resulting + * in a single read-only component. The “old” components are discarded when no scanner is reading + * them. + */ +@InterfaceAudience.Private +public class CompactedMemStore extends AbstractMemStore { + + private static final Log LOG = LogFactory.getLog(CompactedMemStore.class); + + private HStore store; + private CompactionPipeline pipeline; + private MemStoreCompactor compactor; + private boolean forceFlush; + + public final static long DEEP_OVERHEAD_PER_PIPELINE_ITEM = ClassSize.align(ClassSize + .TIMERANGE_TRACKER + + ClassSize.CELL_SKIPLIST_SET + ClassSize.CONCURRENT_SKIPLISTMAP); + + public static long getMemStoreSegmentSize(MemStoreSegment segment) { + return segment.getSize() - DEEP_OVERHEAD_PER_PIPELINE_ITEM; + } + + public static long getMemStoreSegmentListSize(LinkedList list) { + long res = 0; + for(MemStoreSegment segment : list) { + res += getMemStoreSegmentSize(segment); + } + return res; + } + + public CompactedMemStore(Configuration conf, CellComparator c, + HStore store) throws IOException { + super(conf, c); + this.store = store; + this.pipeline = new CompactionPipeline(store.getHRegion()); + this.compactor = new MemStoreCompactor(this, pipeline, c, conf); + this.forceFlush = false; + } + + @Override + public boolean shouldSeek(Scan scan, long oldestUnexpiredTS) { + LinkedList list = getMemStoreSegmentList(); + for(MemStoreSegment item : list) { + if(item.shouldSeek(scan, oldestUnexpiredTS)) { + return true; + } + } + return false; + } + + @Override + protected List getListOfScanners(long readPt) throws IOException { + LinkedList pipelineList = pipeline.getCellSetMgrList(); + LinkedList list = new LinkedList(); + list.add(getActive().getScanner(readPt)); + for(MemStoreSegment item : pipelineList) { + list.add(item.getScanner(readPt)); + } + list.add(getSnapshot().getScanner(readPt)); + // set sequence ids by decsending order + Iterator iterator = list.descendingIterator(); + int seqId = 0; + while(iterator.hasNext()){ + iterator.next().setSequenceID(seqId); + seqId++; + } + return list; + } + + /** + * @return Total memory occupied by this MemStore. + * This is not thread safe and the memstore may be changed while computing its size. + * It is the responsibility of the caller to make sure this doesn't happen. + */ + @Override public long size() { + long res = 0; + for(MemStoreSegment item : getMemStoreSegmentList()) { + res += item.getSize(); + } + return res; + } + + /** + * The semantics of the snapshot method are changed to do the following: + * When force-flush flag is on, create a snapshot of the tail of current compaction pipeline + * otherwise, push the current active memstore bucket into the pipeline. + * Snapshot must be cleared by call to {@link #clearSnapshot}. + * {@link #clearSnapshot(long)}. + * + * @return {@link MemStoreSnapshot} + */ + @Override public MemStoreSnapshot snapshot() { + MemStoreSegment active = getActive(); + if(!forceFlush) { + LOG.info("Snapshot called without forcing flush. "); + LOG.info("Pushing active set into compaction pipeline, and initiating compaction."); + pushActiveToPipeline(active); + try { + // Speculative compaction execution, may be interrupted if flush is forced while + // compaction is in progress + compactor.startCompact(store); + } catch (IOException e) { + LOG.error("Unable to run memstore compaction", e); + } + } else { //**** FORCE FLUSH MODE ****// + // If snapshot currently has entries, then flusher failed or didn't call + // cleanup. Log a warning. + if (!getSnapshot().isEmpty()) { + LOG.warn("Snapshot called again without clearing previous. " + + "Doing nothing. Another ongoing flush or did we fail last attempt?"); + } else { + LOG.info("FORCE FLUSH MODE: Pushing active set into compaction pipeline, " + + "and pipeline tail into snapshot."); + pushActiveToPipeline(active); + this.snapshotId = EnvironmentEdgeManager.currentTime(); + pushTailToSnapshot(); + resetForceFlush(); + } + } + return new MemStoreSnapshot(this.snapshotId, getSnapshot(), getComparator()); + } + + private void pushActiveToPipeline(MemStoreSegment active) { + if (!active.isEmpty()) { + pipeline.pushHead(active); + active.setSize(active.getSize() - deepOverhead() + DEEP_OVERHEAD_PER_PIPELINE_ITEM); + long size = getMemStoreSegmentSize(active); + resetCellSet(); + updateRegionCounters(size); + } + } + + private void pushTailToSnapshot() { + MemStoreSegment tail = pipeline.pullTail(); + if(!tail.isEmpty()) { + setSnapshot(tail); + long size = getMemStoreSegmentSize(tail); + setSnapshotSize(size); + updateRegionCounters(-size); + } + } + + private void updateRegionCounters(long size) { + if(getRegion() != null) { + long globalMemstoreAdditionalSize = getRegion().addAndGetGlobalMemstoreAdditionalSize(size); + // no need to update global memstore size as it is updated by the flusher + LOG.info(" globalMemstoreAdditionalSize: "+globalMemstoreAdditionalSize); + } + } + + /** + * On flush, how much memory we will clear from the active cell set. + * + * @return size of data that is going to be flushed from active set + */ + @Override + public long getFlushableSize() { + return keySize(); + } + + /** + * Remove n key from the memstore. Only kvs that have the same key and the same memstoreTS are + * removed. It is ok to not update timeRangeTracker in this call. + * + * @param cell + */ + @Override public void rollback(Cell cell) { + rollbackSnapshot(cell); + pipeline.rollback(cell); + rollbackCellSet(cell); + } + + /** + * Find the key that matches row exactly, or the one that immediately precedes it. The + * target row key is set in state. + * +/ * @param state column/delete tracking state + */ +// @Override +// public void getRowKeyAtOrBefore(GetClosestRowBeforeTracker state) { +// getActive().getRowKeyAtOrBefore(state); +// pipeline.getRowKeyAtOrBefore(state); +// getSnapshot().getRowKeyAtOrBefore(state); +// } + + @Override + public AbstractMemStore setForceFlush() { + forceFlush = true; + // stop compactor if currently working, to avoid possible conflict in pipeline + compactor.stopCompact(); + return this; + } + + @Override public boolean isMemstoreCompaction() { + return compactor.isInCompaction(); + } + + private CompactedMemStore resetForceFlush() { + forceFlush = false; + return this; + } + + private LinkedList getMemStoreSegmentList() { + LinkedList pipelineList = pipeline.getCellSetMgrList(); + LinkedList list = new LinkedList(); + list.add(getActive()); + list.addAll(pipelineList); + list.add(getSnapshot()); + return list; + } + + //methods for tests + + /** + * @param cell Find the row that comes after this one. If null, we return the + * first. + * @return Next row or null if none found. + */ + Cell getNextRow(final Cell cell) { + Cell lowest = null; + LinkedList segments = getMemStoreSegmentList(); + for (MemStoreSegment segment : segments) { + if (lowest==null) { + lowest = getNextRow(cell, segment.getCellSet()); + } else { + lowest = getLowest(lowest, getNextRow(cell, segment.getCellSet())); + } + } + return lowest; + } + + void disableCompaction() { + compactor.toggleCompaction(false); + } + void enableCompaction() { + compactor.toggleCompaction(true); + } + + public HRegion getRegion() { + return store.getHRegion(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionPipeline.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionPipeline.java new file mode 100644 index 0000000..0dd02d6 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionPipeline.java @@ -0,0 +1,223 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +/** + * The compaction pipeline of a {@link CompactedMemStore}, is a FIFO queue of cell set buckets. + * It supports pushing a cell set bucket at the head of the pipeline and pulling a bucket from the + * tail to flush to disk. + * It also supports swap operation to allow the compactor swap a subset of the buckets with a new + * (compacted) one. This swap succeeds only if the version number passed with the list of buckets + * to swap is the same as the current version of the pipeline. + * The pipeline version is updated whenever swapping buckets or pulling the bucket at the tail. + */ +@InterfaceAudience.Private +public class CompactionPipeline { + private static final Log LOG = LogFactory.getLog(CompactedMemStore.class); + + private final HRegion region; + private LinkedList pipeline; + private long version; + // a lock to protect critical sections changing the structure of the list + private final Lock lock; + + private static final MemStoreSegment EMPTY_MEM_STORE_SEGMENT = MemStoreSegment.Factory.instance() + .createMemStoreSegment(CellSet.Type.EMPTY, null, + CompactedMemStore.DEEP_OVERHEAD_PER_PIPELINE_ITEM); + + public CompactionPipeline(HRegion region) { + this.region = region; + this.pipeline = new LinkedList(); + this.version = 0; + this.lock = new ReentrantLock(true); + } + + public boolean pushHead(MemStoreSegment segment) { + lock.lock(); + try { + return addFirst(segment); + } finally { + lock.unlock(); + } + } + + public MemStoreSegment pullTail() { + lock.lock(); + try { + if(pipeline.isEmpty()) { + return EMPTY_MEM_STORE_SEGMENT; + } + return removeLast(); + } finally { + lock.unlock(); + } + } + + public VersionedSegmentsList getVersionedList() { + lock.lock(); + try { + LinkedList segmentList = new LinkedList(pipeline); + VersionedSegmentsList res = new VersionedSegmentsList(segmentList, version); + return res; + } finally { + lock.unlock(); + } + } + + /** + * Swaps the versioned list at the tail of the pipeline with the new compacted segment. + * Swapping only if there were no changes to the suffix of the list while it was compacted. + * @param versionedList tail of the pipeline that was compacted + * @param segment new compacted segment + * @return true iff swapped tail with new compacted segment + */ + public boolean swap(VersionedSegmentsList versionedList, MemStoreSegment segment) { + if(versionedList.getVersion() != version) { + return false; + } + lock.lock(); + try { + if(versionedList.getVersion() != version) { + return false; + } + LinkedList suffix = versionedList.getMemStoreSegments(); + boolean valid = validateSuffixList(suffix); + if(!valid) return false; + LOG.info("Swapping pipeline suffix with compacted item."); + swapSuffix(suffix,segment); + if(region != null) { + // update the global memstore size counter + long suffixSize = CompactedMemStore.getMemStoreSegmentListSize(suffix); + long newSize = CompactedMemStore.getMemStoreSegmentSize(segment); + long delta = suffixSize - newSize; + long globalMemstoreAdditionalSize = region.addAndGetGlobalMemstoreAdditionalSize(-delta); + LOG.info("Suffix size: "+ suffixSize+" compacted item size: "+newSize+ + " globalMemstoreAdditionalSize: "+globalMemstoreAdditionalSize); + } + return true; + } finally { + lock.unlock(); + } + } + + public long rollback(Cell cell) { + lock.lock(); + long sz = 0; + try { + if(!pipeline.isEmpty()) { + Iterator pipelineBackwardIterator = pipeline.descendingIterator(); + MemStoreSegment current = pipelineBackwardIterator.next(); + for (; pipelineBackwardIterator.hasNext(); current = pipelineBackwardIterator.next()) { + sz += current.rollback(cell); + } + if(sz != 0) { + incVersion(); + } + } + return sz; + } finally { + lock.unlock(); + } + } + +// public void getRowKeyAtOrBefore(GetClosestRowBeforeTracker state) { +// for(MemStoreSegment item : getCellSetMgrList()) { +// item.getRowKeyAtOrBefore(state); +// } +// } + + public boolean isEmpty() { + return pipeline.isEmpty(); + } + + public LinkedList getCellSetMgrList() { + lock.lock(); + try { + LinkedList res = new LinkedList(pipeline); + return res; + } finally { + lock.unlock(); + } + + } + + public long size() { + return pipeline.size(); + } + + private boolean validateSuffixList(LinkedList suffix) { + if(suffix.isEmpty()) { + // empty suffix is always valid + return true; + } + + Iterator pipelineBackwardIterator = pipeline.descendingIterator(); + Iterator suffixBackwardIterator = suffix.descendingIterator(); + MemStoreSegment suffixCurrent; + MemStoreSegment pipelineCurrent; + for( ; suffixBackwardIterator.hasNext(); ) { + if(!pipelineBackwardIterator.hasNext()) { + // a suffix longer than pipeline is invalid + return false; + } + suffixCurrent = suffixBackwardIterator.next(); + pipelineCurrent = pipelineBackwardIterator.next(); + if(suffixCurrent != pipelineCurrent) { + // non-matching suffix + return false; + } + } + // suffix matches pipeline suffix + return true; + } + + private void swapSuffix(LinkedList suffix, MemStoreSegment segment) { + version++; + for(MemStoreSegment itemInSuffix : suffix) { + itemInSuffix.close(); + } + pipeline.removeAll(suffix); + pipeline.addLast(segment); + } + + private MemStoreSegment removeLast() { + version++; + return pipeline.removeLast(); + } + + private boolean addFirst(MemStoreSegment segment) { + pipeline.add(0,segment); + return true; + } + + private void incVersion() { + version++; + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java index cc8c3a8..56f6217 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java @@ -19,35 +19,24 @@ package org.apache.hadoop.hbase.regionserver; -import java.lang.management.ManagementFactory; -import java.lang.management.RuntimeMXBean; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableSet; -import java.util.SortedSet; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.util.ByteRange; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.ClassSize; -import org.apache.hadoop.hbase.util.CollectionBackedScanner; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; -import org.apache.hadoop.hbase.util.ReflectionUtils; -import org.apache.htrace.Trace; + +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.util.ArrayList; +import java.util.List; /** * The MemStore holds in-memory modifications to the Store. Modifications @@ -66,40 +55,8 @@ import org.apache.htrace.Trace; * in KV size. */ @InterfaceAudience.Private -public class DefaultMemStore implements MemStore { +public class DefaultMemStore extends AbstractMemStore { private static final Log LOG = LogFactory.getLog(DefaultMemStore.class); - static final String USEMSLAB_KEY = "hbase.hregion.memstore.mslab.enabled"; - private static final boolean USEMSLAB_DEFAULT = true; - static final String MSLAB_CLASS_NAME = "hbase.regionserver.mslab.class"; - - private Configuration conf; - - // MemStore. Use a CellSkipListSet rather than SkipListSet because of the - // better semantics. The Map will overwrite if passed a key it already had - // whereas the Set will not add new Cell if key is same though value might be - // different. Value is not important -- just make sure always same - // reference passed. - volatile CellSkipListSet cellSet; - - // Snapshot of memstore. Made for flusher. - volatile CellSkipListSet snapshot; - - final CellComparator comparator; - - // Used to track own heapSize - final AtomicLong size; - private volatile long snapshotSize; - - // Used to track when to flush - volatile long timeOfOldestEdit = Long.MAX_VALUE; - - TimeRangeTracker timeRangeTracker; - TimeRangeTracker snapshotTimeRangeTracker; - - volatile MemStoreLAB allocator; - volatile MemStoreLAB snapshotAllocator; - volatile long snapshotId; - volatile boolean tagsPresent; /** * Default constructor. Used for tests. @@ -112,32 +69,12 @@ public class DefaultMemStore implements MemStore { * Constructor. * @param c Comparator */ - public DefaultMemStore(final Configuration conf, - final CellComparator c) { - this.conf = conf; - this.comparator = c; - this.cellSet = new CellSkipListSet(c); - this.snapshot = new CellSkipListSet(c); - timeRangeTracker = new TimeRangeTracker(); - snapshotTimeRangeTracker = new TimeRangeTracker(); - this.size = new AtomicLong(DEEP_OVERHEAD); - this.snapshotSize = 0; - if (conf.getBoolean(USEMSLAB_KEY, USEMSLAB_DEFAULT)) { - String className = conf.get(MSLAB_CLASS_NAME, HeapMemStoreLAB.class.getName()); - this.allocator = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class }, new Object[] { conf }); - } else { - this.allocator = null; - } + public DefaultMemStore(final Configuration conf, final CellComparator c) { + super(conf, c); } void dump() { - for (Cell cell: this.cellSet) { - LOG.info(cell); - } - for (Cell cell: this.snapshot) { - LOG.info(cell); - } + super.dump(LOG); } /** @@ -148,148 +85,27 @@ public class DefaultMemStore implements MemStore { public MemStoreSnapshot snapshot() { // If snapshot currently has entries, then flusher failed or didn't call // cleanup. Log a warning. - if (!this.snapshot.isEmpty()) { + if (!getSnapshot().isEmpty()) { LOG.warn("Snapshot called again without clearing previous. " + "Doing nothing. Another ongoing flush or did we fail last attempt?"); } else { this.snapshotId = EnvironmentEdgeManager.currentTime(); - this.snapshotSize = keySize(); - if (!this.cellSet.isEmpty()) { - this.snapshot = this.cellSet; - this.cellSet = new CellSkipListSet(this.comparator); - this.snapshotTimeRangeTracker = this.timeRangeTracker; - this.timeRangeTracker = new TimeRangeTracker(); - // Reset heap to not include any keys - this.size.set(DEEP_OVERHEAD); - this.snapshotAllocator = this.allocator; - // Reset allocator so we get a fresh buffer for the new memstore - if (allocator != null) { - String className = conf.get(MSLAB_CLASS_NAME, HeapMemStoreLAB.class.getName()); - this.allocator = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class }, new Object[] { conf }); - } - timeOfOldestEdit = Long.MAX_VALUE; + if (!getActive().isEmpty()) { + setSnapshot(getActive()); + setSnapshotSize(keySize()); + resetCellSet(); } } - MemStoreSnapshot memStoreSnapshot = new MemStoreSnapshot(this.snapshotId, snapshot.size(), this.snapshotSize, - this.snapshotTimeRangeTracker, new CollectionBackedScanner(snapshot, this.comparator), - this.tagsPresent); - this.tagsPresent = false; - return memStoreSnapshot; - } + return new MemStoreSnapshot(this.snapshotId, getSnapshot(), getComparator()); - /** - * The passed snapshot was successfully persisted; it can be let go. - * @param id Id of the snapshot to clean out. - * @throws UnexpectedStateException - * @see #snapshot() - */ - @Override - public void clearSnapshot(long id) throws UnexpectedStateException { - MemStoreLAB tmpAllocator = null; - if (this.snapshotId != id) { - throw new UnexpectedStateException("Current snapshot id is " + this.snapshotId + ",passed " - + id); - } - // OK. Passed in snapshot is same as current snapshot. If not-empty, - // create a new snapshot and let the old one go. - if (!this.snapshot.isEmpty()) { - this.snapshot = new CellSkipListSet(this.comparator); - this.snapshotTimeRangeTracker = new TimeRangeTracker(); - } - this.snapshotSize = 0; - this.snapshotId = -1; - if (this.snapshotAllocator != null) { - tmpAllocator = this.snapshotAllocator; - this.snapshotAllocator = null; - } - if (tmpAllocator != null) { - tmpAllocator.close(); - } - } - - @Override - public long getFlushableSize() { - return this.snapshotSize > 0 ? this.snapshotSize : keySize(); - } - - @Override - public long getSnapshotSize() { - return this.snapshotSize; - } - - /** - * Write an update - * @param cell - * @return approximate size of the passed KV & newly added KV which maybe different than the - * passed-in KV - */ - @Override - public Pair add(Cell cell) { - Cell toAdd = maybeCloneWithAllocator(cell); - return new Pair(internalAdd(toAdd), toAdd); } @Override - public long timeOfOldestEdit() { - return timeOfOldestEdit; - } - - private boolean addToCellSet(Cell e) { - boolean b = this.cellSet.add(e); - // In no tags case this NoTagsKeyValue.getTagsLength() is a cheap call. - // When we use ACL CP or Visibility CP which deals with Tags during - // mutation, the TagRewriteCell.getTagsLength() is a cheaper call. We do not - // parse the byte[] to identify the tags length. - if(e.getTagsLength() > 0) { - tagsPresent = true; - } - setOldestEditTimeToNow(); - return b; - } - - private boolean removeFromCellSet(Cell e) { - boolean b = this.cellSet.remove(e); - setOldestEditTimeToNow(); - return b; - } - - void setOldestEditTimeToNow() { - if (timeOfOldestEdit == Long.MAX_VALUE) { - timeOfOldestEdit = EnvironmentEdgeManager.currentTime(); - } - } - - /** - * Internal version of add() that doesn't clone Cells with the - * allocator, and doesn't take the lock. - * - * Callers should ensure they already have the read lock taken - */ - private long internalAdd(final Cell toAdd) { - long s = heapSizeChange(toAdd, addToCellSet(toAdd)); - timeRangeTracker.includeTimestamp(toAdd); - this.size.addAndGet(s); - return s; - } - - private Cell maybeCloneWithAllocator(Cell cell) { - if (allocator == null) { - return cell; - } - - int len = KeyValueUtil.length(cell); - ByteRange alloc = allocator.allocateBytes(len); - if (alloc == null) { - // The allocation was too large, allocator decided - // not to do anything with it. - return cell; - } - assert alloc.getBytes() != null; - KeyValueUtil.appendToByteArray(cell, alloc.getBytes(), alloc.getOffset()); - KeyValue newKv = new KeyValue(alloc.getBytes(), alloc.getOffset(), len); - newKv.setSequenceId(cell.getSequenceId()); - return newKv; + protected List getListOfScanners(long readPt) throws IOException { + List list = new ArrayList(2); + list.add(0, getActive().getScanner(readPt)); + list.add(1, getSnapshot().getScanner(readPt)); + return list; } /** @@ -302,637 +118,55 @@ public class DefaultMemStore implements MemStore { */ @Override public void rollback(Cell cell) { - // If the key is in the snapshot, delete it. We should not update - // this.size, because that tracks the size of only the memstore and - // not the snapshot. The flush of this snapshot to disk has not - // yet started because Store.flush() waits for all rwcc transactions to - // commit before starting the flush to disk. - Cell found = this.snapshot.get(cell); - if (found != null && found.getSequenceId() == cell.getSequenceId()) { - this.snapshot.remove(cell); - long sz = heapSizeChange(cell, true); - this.snapshotSize -= sz; - } - // If the key is in the memstore, delete it. Update this.size. - found = this.cellSet.get(cell); - if (found != null && found.getSequenceId() == cell.getSequenceId()) { - removeFromCellSet(cell); - long s = heapSizeChange(cell, true); - this.size.addAndGet(-s); - } - } - - /** - * Write a delete - * @param deleteCell - * @return approximate size of the passed key and value. - */ - @Override - public long delete(Cell deleteCell) { - long s = 0; - Cell toAdd = maybeCloneWithAllocator(deleteCell); - s += heapSizeChange(toAdd, addToCellSet(toAdd)); - timeRangeTracker.includeTimestamp(toAdd); - this.size.addAndGet(s); - return s; - } - - /** - * @param cell Find the row that comes after this one. If null, we return the - * first. - * @return Next row or null if none found. - */ - Cell getNextRow(final Cell cell) { - return getLowest(getNextRow(cell, this.cellSet), getNextRow(cell, this.snapshot)); - } - - /* - * @param a - * @param b - * @return Return lowest of a or b or null if both a and b are null - */ - private Cell getLowest(final Cell a, final Cell b) { - if (a == null) { - return b; - } - if (b == null) { - return a; - } - return comparator.compareRows(a, b) <= 0? a: b; - } - - /* - * @param key Find row that follows this one. If null, return first. - * @param map Set to look in for a row beyond row. - * @return Next row or null if none found. If one found, will be a new - * KeyValue -- can be destroyed by subsequent calls to this method. - */ - private Cell getNextRow(final Cell key, - final NavigableSet set) { - Cell result = null; - SortedSet tail = key == null? set: set.tailSet(key); - // Iterate until we fall into the next row; i.e. move off current row - for (Cell cell: tail) { - if (comparator.compareRows(cell, key) <= 0) - continue; - // Note: Not suppressing deletes or expired cells. Needs to be handled - // by higher up functions. - result = cell; - break; - } - return result; + rollbackSnapshot(cell); + rollbackCellSet(cell); } /** - * Only used by tests. TODO: Remove - * - * Given the specs of a column, update it, first by inserting a new record, - * then removing the old one. Since there is only 1 KeyValue involved, the memstoreTS - * will be set to 0, thus ensuring that they instantly appear to anyone. The underlying - * store will ensure that the insert/delete each are atomic. A scanner/reader will either - * get the new value, or the old value and all readers will eventually only see the new - * value after the old was removed. - * - * @param row - * @param family - * @param qualifier - * @param newValue - * @param now - * @return Timestamp + * Check if this memstore may contain the required keys + * @param scan + * @return False if the key definitely does not exist in this memstore */ @Override - public long updateColumnValue(byte[] row, - byte[] family, - byte[] qualifier, - long newValue, - long now) { - Cell firstCell = KeyValueUtil.createFirstOnRow(row, family, qualifier); - // Is there a Cell in 'snapshot' with the same TS? If so, upgrade the timestamp a bit. - SortedSet snSs = snapshot.tailSet(firstCell); - if (!snSs.isEmpty()) { - Cell snc = snSs.first(); - // is there a matching Cell in the snapshot? - if (CellUtil.matchingRow(snc, firstCell) && CellUtil.matchingQualifier(snc, firstCell)) { - if (snc.getTimestamp() == now) { - // poop, - now += 1; - } - } - } - - // logic here: the new ts MUST be at least 'now'. But it could be larger if necessary. - // But the timestamp should also be max(now, mostRecentTsInMemstore) - - // so we cant add the new Cell w/o knowing what's there already, but we also - // want to take this chance to delete some cells. So two loops (sad) - - SortedSet ss = cellSet.tailSet(firstCell); - for (Cell cell : ss) { - // if this isnt the row we are interested in, then bail: - if (!CellUtil.matchingColumn(cell, family, qualifier) - || !CellUtil.matchingRow(cell, firstCell)) { - break; // rows dont match, bail. - } - - // if the qualifier matches and it's a put, just RM it out of the cellSet. - if (cell.getTypeByte() == KeyValue.Type.Put.getCode() && - cell.getTimestamp() > now && CellUtil.matchingQualifier(firstCell, cell)) { - now = cell.getTimestamp(); - } - } - - // create or update (upsert) a new Cell with - // 'now' and a 0 memstoreTS == immediately visible - List cells = new ArrayList(1); - cells.add(new KeyValue(row, family, qualifier, now, Bytes.toBytes(newValue))); - return upsert(cells, 1L); + public boolean shouldSeek(Scan scan, long oldestUnexpiredTS) { + return + (getActive().shouldSeek(scan, oldestUnexpiredTS) || + getSnapshot().shouldSeek(scan,oldestUnexpiredTS)); } - /** - * Update or insert the specified KeyValues. - *

- * For each KeyValue, insert into MemStore. This will atomically upsert the - * value for that row/family/qualifier. If a KeyValue did already exist, - * it will then be removed. - *

- * Currently the memstoreTS is kept at 0 so as each insert happens, it will - * be immediately visible. May want to change this so it is atomic across - * all KeyValues. - *

- * This is called under row lock, so Get operations will still see updates - * atomically. Scans will only see each KeyValue update as atomic. - * - * @param cells - * @param readpoint readpoint below which we can safely remove duplicate KVs - * @return change in memstore size - */ @Override - public long upsert(Iterable cells, long readpoint) { - long size = 0; - for (Cell cell : cells) { - size += upsert(cell, readpoint); - } - return size; + public long getFlushableSize() { + long snapshotSize = getSnapshot().getSize(); + return snapshotSize > 0 ? snapshotSize : keySize(); } - /** - * Inserts the specified KeyValue into MemStore and deletes any existing - * versions of the same row/family/qualifier as the specified KeyValue. - *

- * First, the specified KeyValue is inserted into the Memstore. - *

- * If there are any existing KeyValues in this MemStore with the same row, - * family, and qualifier, they are removed. - *

- * Callers must hold the read lock. - * - * @param cell - * @return change in size of MemStore - */ - private long upsert(Cell cell, long readpoint) { - // Add the Cell to the MemStore - // Use the internalAdd method here since we (a) already have a lock - // and (b) cannot safely use the MSLAB here without potentially - // hitting OOME - see TestMemStore.testUpsertMSLAB for a - // test that triggers the pathological case if we don't avoid MSLAB - // here. - long addedSize = internalAdd(cell); - - // Get the Cells for the row/family/qualifier regardless of timestamp. - // For this case we want to clean up any other puts - Cell firstCell = KeyValueUtil.createFirstOnRow( - cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), - cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), - cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); - SortedSet ss = cellSet.tailSet(firstCell); - Iterator it = ss.iterator(); - // versions visible to oldest scanner - int versionsVisible = 0; - while ( it.hasNext() ) { - Cell cur = it.next(); - - if (cell == cur) { - // ignore the one just put in - continue; - } - // check that this is the row and column we are interested in, otherwise bail - if (CellUtil.matchingRow(cell, cur) && CellUtil.matchingQualifier(cell, cur)) { - // only remove Puts that concurrent scanners cannot possibly see - if (cur.getTypeByte() == KeyValue.Type.Put.getCode() && - cur.getSequenceId() <= readpoint) { - if (versionsVisible >= 1) { - // if we get here we have seen at least one version visible to the oldest scanner, - // which means we can prove that no scanner will see this version - - // false means there was a change, so give us the size. - long delta = heapSizeChange(cur, true); - addedSize -= delta; - this.size.addAndGet(-delta); - it.remove(); - setOldestEditTimeToNow(); - } else { - versionsVisible++; - } - } - } else { - // past the row or column, done - break; - } - } - return addedSize; - } + //methods for tests /** - * @return scanner on memstore and snapshot in this order. + * @param cell Find the row that comes after this one. If null, we return the + * first. + * @return Next row or null if none found. */ - @Override - public List getScanners(long readPt) { - return Collections. singletonList(new MemStoreScanner(readPt)); + Cell getNextRow(final Cell cell) { + return getLowest( + getNextRow(cell, getActive().getCellSet()), + getNextRow(cell, getSnapshot().getCellSet())); } /** - * Check if this memstore may contain the required keys - * @param scan - * @return False if the key definitely does not exist in this Memstore + * @return Total memory occupied by this MemStore. */ - public boolean shouldSeek(Scan scan, long oldestUnexpiredTS) { - return (timeRangeTracker.includesTimeRange(scan.getTimeRange()) || - snapshotTimeRangeTracker.includesTimeRange(scan.getTimeRange())) - && (Math.max(timeRangeTracker.getMaximumTimestamp(), - snapshotTimeRangeTracker.getMaximumTimestamp()) >= - oldestUnexpiredTS); - } - - /* - * MemStoreScanner implements the KeyValueScanner. - * It lets the caller scan the contents of a memstore -- both current - * map and snapshot. - * This behaves as if it were a real scanner but does not maintain position. - */ - protected class MemStoreScanner extends NonLazyKeyValueScanner { - // Next row information for either cellSet or snapshot - private Cell cellSetNextRow = null; - private Cell snapshotNextRow = null; - - // last iterated Cells for cellSet and snapshot (to restore iterator state after reseek) - private Cell cellSetItRow = null; - private Cell snapshotItRow = null; - - // iterator based scanning. - private Iterator cellSetIt; - private Iterator snapshotIt; - - // The cellSet and snapshot at the time of creating this scanner - private CellSkipListSet cellSetAtCreation; - private CellSkipListSet snapshotAtCreation; - - // the pre-calculated Cell to be returned by peek() or next() - private Cell theNext; - - // The allocator and snapshot allocator at the time of creating this scanner - volatile MemStoreLAB allocatorAtCreation; - volatile MemStoreLAB snapshotAllocatorAtCreation; - - // A flag represents whether could stop skipping Cells for MVCC - // if have encountered the next row. Only used for reversed scan - private boolean stopSkippingCellsIfNextRow = false; - - private long readPoint; - - /* - Some notes... - - So memstorescanner is fixed at creation time. this includes pointers/iterators into - existing kvset/snapshot. during a snapshot creation, the kvset is null, and the - snapshot is moved. since kvset is null there is no point on reseeking on both, - we can save us the trouble. During the snapshot->hfile transition, the memstore - scanner is re-created by StoreScanner#updateReaders(). StoreScanner should - potentially do something smarter by adjusting the existing memstore scanner. - - But there is a greater problem here, that being once a scanner has progressed - during a snapshot scenario, we currently iterate past the kvset then 'finish' up. - if a scan lasts a little while, there is a chance for new entries in kvset to - become available but we will never see them. This needs to be handled at the - StoreScanner level with coordination with MemStoreScanner. - - Currently, this problem is only partly managed: during the small amount of time - when the StoreScanner has not yet created a new MemStoreScanner, we will miss - the adds to kvset in the MemStoreScanner. - */ - - MemStoreScanner(long readPoint) { - super(); - - this.readPoint = readPoint; - cellSetAtCreation = cellSet; - snapshotAtCreation = snapshot; - if (allocator != null) { - this.allocatorAtCreation = allocator; - this.allocatorAtCreation.incScannerCount(); - } - if (snapshotAllocator != null) { - this.snapshotAllocatorAtCreation = snapshotAllocator; - this.snapshotAllocatorAtCreation.incScannerCount(); - } - if (Trace.isTracing() && Trace.currentSpan() != null) { - Trace.currentSpan().addTimelineAnnotation("Creating MemStoreScanner"); - } - } - - /** - * Lock on 'this' must be held by caller. - * @param it - * @return Next Cell - */ - private Cell getNext(Iterator it) { - Cell startCell = theNext; - Cell v = null; - try { - while (it.hasNext()) { - v = it.next(); - if (v.getSequenceId() <= this.readPoint) { - return v; - } - if (stopSkippingCellsIfNextRow && startCell != null - && comparator.compareRows(v, startCell) > 0) { - return null; - } - } - - return null; - } finally { - if (v != null) { - // in all cases, remember the last Cell iterated to - if (it == snapshotIt) { - snapshotItRow = v; - } else { - cellSetItRow = v; - } - } - } - } - - /** - * Set the scanner at the seek key. - * Must be called only once: there is no thread safety between the scanner - * and the memStore. - * @param key seek value - * @return false if the key is null or if there is no data - */ - @Override - public synchronized boolean seek(Cell key) { - if (key == null) { - close(); - return false; - } - // kvset and snapshot will never be null. - // if tailSet can't find anything, SortedSet is empty (not null). - cellSetIt = cellSetAtCreation.tailSet(key).iterator(); - snapshotIt = snapshotAtCreation.tailSet(key).iterator(); - cellSetItRow = null; - snapshotItRow = null; - - return seekInSubLists(key); - } - - - /** - * (Re)initialize the iterators after a seek or a reseek. - */ - private synchronized boolean seekInSubLists(Cell key){ - cellSetNextRow = getNext(cellSetIt); - snapshotNextRow = getNext(snapshotIt); - - // Calculate the next value - theNext = getLowest(cellSetNextRow, snapshotNextRow); - - // has data - return (theNext != null); - } - - - /** - * Move forward on the sub-lists set previously by seek. - * @param key seek value (should be non-null) - * @return true if there is at least one KV to read, false otherwise - */ - @Override - public synchronized boolean reseek(Cell key) { - /* - See HBASE-4195 & HBASE-3855 & HBASE-6591 for the background on this implementation. - This code is executed concurrently with flush and puts, without locks. - Two points must be known when working on this code: - 1) It's not possible to use the 'kvTail' and 'snapshot' - variables, as they are modified during a flush. - 2) The ideal implementation for performance would use the sub skip list - implicitly pointed by the iterators 'kvsetIt' and - 'snapshotIt'. Unfortunately the Java API does not offer a method to - get it. So we remember the last keys we iterated to and restore - the reseeked set to at least that point. - */ - cellSetIt = cellSetAtCreation.tailSet(getHighest(key, cellSetItRow)).iterator(); - snapshotIt = snapshotAtCreation.tailSet(getHighest(key, snapshotItRow)).iterator(); - - return seekInSubLists(key); - } - - - @Override - public synchronized Cell peek() { - //DebugPrint.println(" MS@" + hashCode() + " peek = " + getLowest()); - return theNext; - } - - @Override - public synchronized Cell next() { - if (theNext == null) { - return null; - } - - final Cell ret = theNext; - - // Advance one of the iterators - if (theNext == cellSetNextRow) { - cellSetNextRow = getNext(cellSetIt); - } else { - snapshotNextRow = getNext(snapshotIt); - } - - // Calculate the next value - theNext = getLowest(cellSetNextRow, snapshotNextRow); - - //long readpoint = ReadWriteConsistencyControl.getThreadReadPoint(); - //DebugPrint.println(" MS@" + hashCode() + " next: " + theNext + " next_next: " + - // getLowest() + " threadpoint=" + readpoint); - return ret; - } - - /* - * Returns the lower of the two key values, or null if they are both null. - * This uses comparator.compare() to compare the KeyValue using the memstore - * comparator. - */ - private Cell getLowest(Cell first, Cell second) { - if (first == null && second == null) { - return null; - } - if (first != null && second != null) { - int compare = comparator.compare(first, second); - return (compare <= 0 ? first : second); - } - return (first != null ? first : second); - } - - /* - * Returns the higher of the two cells, or null if they are both null. - * This uses comparator.compare() to compare the Cell using the memstore - * comparator. - */ - private Cell getHighest(Cell first, Cell second) { - if (first == null && second == null) { - return null; - } - if (first != null && second != null) { - int compare = comparator.compare(first, second); - return (compare > 0 ? first : second); - } - return (first != null ? first : second); - } - - public synchronized void close() { - this.cellSetNextRow = null; - this.snapshotNextRow = null; - - this.cellSetIt = null; - this.snapshotIt = null; - - if (allocatorAtCreation != null) { - this.allocatorAtCreation.decScannerCount(); - this.allocatorAtCreation = null; - } - if (snapshotAllocatorAtCreation != null) { - this.snapshotAllocatorAtCreation.decScannerCount(); - this.snapshotAllocatorAtCreation = null; - } - - this.cellSetItRow = null; - this.snapshotItRow = null; - } - - /** - * MemStoreScanner returns max value as sequence id because it will - * always have the latest data among all files. - */ - @Override - public long getSequenceID() { - return Long.MAX_VALUE; - } - - @Override - public boolean shouldUseScanner(Scan scan, SortedSet columns, - long oldestUnexpiredTS) { - return shouldSeek(scan, oldestUnexpiredTS); - } - - /** - * Seek scanner to the given key first. If it returns false(means - * peek()==null) or scanner's peek row is bigger than row of given key, seek - * the scanner to the previous row of given key - */ - @Override - public synchronized boolean backwardSeek(Cell key) { - seek(key); - if (peek() == null || comparator.compareRows(peek(), key) > 0) { - return seekToPreviousRow(key); - } - return true; - } - - /** - * Separately get the KeyValue before the specified key from kvset and - * snapshotset, and use the row of higher one as the previous row of - * specified key, then seek to the first KeyValue of previous row - */ - @Override - public synchronized boolean seekToPreviousRow(Cell key) { - Cell firstKeyOnRow = CellUtil.createFirstOnRow(key); - SortedSet cellHead = cellSetAtCreation.headSet(firstKeyOnRow); - Cell cellSetBeforeRow = cellHead.isEmpty() ? null : cellHead.last(); - SortedSet snapshotHead = snapshotAtCreation - .headSet(firstKeyOnRow); - Cell snapshotBeforeRow = snapshotHead.isEmpty() ? null : snapshotHead - .last(); - Cell lastCellBeforeRow = getHighest(cellSetBeforeRow, snapshotBeforeRow); - if (lastCellBeforeRow == null) { - theNext = null; - return false; - } - Cell firstKeyOnPreviousRow = CellUtil.createFirstOnRow(lastCellBeforeRow); - this.stopSkippingCellsIfNextRow = true; - seek(firstKeyOnPreviousRow); - this.stopSkippingCellsIfNextRow = false; - if (peek() == null - || comparator.compareRows(peek(), firstKeyOnPreviousRow) > 0) { - return seekToPreviousRow(lastCellBeforeRow); - } - return true; - } - - @Override - public synchronized boolean seekToLastRow() { - Cell first = cellSetAtCreation.isEmpty() ? null : cellSetAtCreation - .last(); - Cell second = snapshotAtCreation.isEmpty() ? null - : snapshotAtCreation.last(); - Cell higherCell = getHighest(first, second); - if (higherCell == null) { - return false; - } - Cell firstCellOnLastRow = CellUtil.createFirstOnRow(higherCell); - if (seek(firstCellOnLastRow)) { - return true; - } else { - return seekToPreviousRow(higherCell); - } - - } - } - - public final static long FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT - + (9 * ClassSize.REFERENCE) + (3 * Bytes.SIZEOF_LONG) + Bytes.SIZEOF_BOOLEAN); - - public final static long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD + - ClassSize.ATOMIC_LONG + (2 * ClassSize.TIMERANGE_TRACKER) + - (2 * ClassSize.CELL_SKIPLIST_SET) + (2 * ClassSize.CONCURRENT_SKIPLISTMAP)); - - /* - * Calculate how the MemStore size has changed. Includes overhead of the - * backing Map. - * @param cell - * @param notpresent True if the cell was NOT present in the set. - * @return Size - */ - static long heapSizeChange(final Cell cell, final boolean notpresent) { - return notpresent ? ClassSize.align(ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY - + CellUtil.estimatedHeapSizeOf(cell)) : 0; - } - - private long keySize() { - return heapSize() - DEEP_OVERHEAD; + @Override public long size() { + return heapSize(); } - /** - * Get the entire heap usage for this MemStore not including keys in the - * snapshot. - */ - @Override - public long heapSize() { - return size.get(); + @Override public AbstractMemStore setForceFlush() { + // do nothing + return this; } - @Override - public long size() { - return heapSize(); + @Override public boolean isMemstoreCompaction() { + return false; } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index c0df4bf..a6c34dd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -276,7 +276,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // TODO: account for each registered handler in HeapSize computation private Map coprocessorServiceHandlers = Maps.newHashMap(); - public final AtomicLong memstoreSize = new AtomicLong(0); + public final AtomicLong memstoreSize = new AtomicLong(0); // size of active set in memstore + // size of additional memstore buckets, e.g., in compaction pipeline + public final AtomicLong memstoreAdditionalSize = new AtomicLong(0); // Debug possible data loss due to WAL off final Counter numMutationsWithoutWAL = new Counter(); @@ -574,6 +576,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi private long flushCheckInterval; // flushPerChanges is to prevent too many changes in memstore private long flushPerChanges; + // force flush size is set to be the average of flush size and blocking size + private long memStoreForceFlushSize; private long blockingMemStoreSize; final long threadWakeFrequency; // Used to guard closes @@ -760,6 +764,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi this.blockingMemStoreSize = this.memstoreFlushSize * conf.getLong(HConstants.HREGION_MEMSTORE_BLOCK_MULTIPLIER, HConstants.DEFAULT_HREGION_MEMSTORE_BLOCK_MULTIPLIER); + // set force flush size to be between flush size and blocking size + this.memStoreForceFlushSize = (this.memstoreFlushSize + this.blockingMemStoreSize) / 2; } /** @@ -1078,6 +1084,15 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi return this.memstoreSize.addAndGet(memStoreSize); } + public long addAndGetGlobalMemstoreAdditionalSize(long size) { + if (this.rsAccounting != null) { + rsAccounting.addAndGetGlobalMemstoreAdditionalSize(size); + } + return this.memstoreAdditionalSize.addAndGet(size); + } + + + @Override public HRegionInfo getRegionInfo() { return this.fs.getRegionInfo(); @@ -1116,6 +1131,15 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi return memstoreSize.get(); } + private long getMemstoreAdditionalSize() { + return memstoreAdditionalSize.get(); + } + + @Override + public long getMemstoreTotalSize() { + return getMemstoreSize() + getMemstoreAdditionalSize(); + } + @Override public long getNumMutationsWithoutWAL() { return numMutationsWithoutWAL.get(); @@ -1406,7 +1430,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // Don't flush the cache if we are aborting if (!abort && canFlush) { int flushCount = 0; - while (this.memstoreSize.get() > 0) { + while (this.getMemstoreTotalSize() > 0) { try { if (flushCount++ > 0) { int actualFlushes = flushCount - 1; @@ -1414,8 +1438,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // If we tried 5 times and are unable to clear memory, abort // so we do not lose data throw new DroppedSnapshotException("Failed clearing memory after " + - actualFlushes + " attempts on region: " + - Bytes.toStringBinary(getRegionInfo().getRegionName())); + actualFlushes + " attempts on region: " + Bytes.toStringBinary(getRegionInfo().getRegionName()) + + " memstore size: " + getMemstoreSize() + " total size (memstore + pipeline)" + + ": " + getMemstoreTotalSize()); } LOG.info("Running extra flush, " + actualFlushes + " (carrying snapshot?) " + this); @@ -1480,6 +1505,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } this.closed.set(true); + if (getMemstoreTotalSize() != 0) LOG.error("Memstore size is " + getMemstoreTotalSize()); if (!canFlush) { addAndGetGlobalMemstoreSize(-memstoreSize.get()); } else if (memstoreSize.get() != 0) { @@ -1832,10 +1858,23 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } @Override + public FlushResult flush(boolean force,boolean forceFlushInsteadOfCompaction) throws IOException { + boolean writeFlushRequestWalMarker = false; + return flushcache(force, writeFlushRequestWalMarker,forceFlushInsteadOfCompaction); + } + + @Override public FlushResult flush(boolean force) throws IOException { - return flushcache(force, false); + boolean writeFlushRequestWalMarker = false; + return flushcache(force, writeFlushRequestWalMarker); } + public FlushResult flushcache(boolean forceFlushAllStores, boolean writeFlushRequestWalMarker) + throws IOException { + boolean forceFlushInsteadOfCompaction = true; + return flushcache(forceFlushAllStores, writeFlushRequestWalMarker, + forceFlushInsteadOfCompaction); + } /** * Flush the cache. * @@ -1858,8 +1897,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * because a Snapshot was not properly persisted. The region is put in closing mode, and the * caller MUST abort after this. */ - public FlushResult flushcache(boolean forceFlushAllStores, boolean writeFlushRequestWalMarker) - throws IOException { + public FlushResult flushcache(boolean forceFlushAllStores, boolean writeFlushRequestWalMarker, + boolean forceFlushInsteadOfCompaction) throws IOException { // fail-fast instead of waiting on the lock if (this.closing.get()) { String msg = "Skipping flush on " + this + " because closing"; @@ -1905,6 +1944,11 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } try { + if(forceFlushInsteadOfCompaction) { + for(Store s : stores.values()) { + s.setForceFlush(); + } + } Collection specificStoresToFlush = forceFlushAllStores ? stores.values() : flushPolicy.selectStoresToFlush(); FlushResult fs = internalFlushcache(specificStoresToFlush, @@ -2006,6 +2050,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi */ private FlushResult internalFlushcache(MonitoredTask status) throws IOException { + for(Store s : stores.values()) { + s.setForceFlush(); + } return internalFlushcache(stores.values(), status, false); } @@ -2071,13 +2118,16 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } final long startTime = EnvironmentEdgeManager.currentTime(); // If nothing to flush, return, but we need to safely update the region sequence id - if (this.memstoreSize.get() <= 0) { + if (getMemstoreTotalSize() <= 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("Empty memstore size for the current region " + this); + } // Take an update lock because am about to change the sequence id and we want the sequence id // to be at the border of the empty memstore. MultiVersionConsistencyControl.WriteEntry w = null; this.updatesLock.writeLock().lock(); try { - if (this.memstoreSize.get() <= 0) { + if (this.getMemstoreTotalSize() <= 0) { // Presume that if there are still no edits in the memstore, then there are no edits for // this region out in the WAL subsystem so no need to do any trickery clearing out // edits in the WAL system. Up the sequence number so the resulting flush id is for @@ -2781,10 +2831,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi initialized = true; } long addedSize = doMiniBatchMutation(batchOp); - long newSize = this.addAndGetGlobalMemstoreSize(addedSize); - if (isFlushSize(newSize)) { - requestFlush(); - } + this.addAndGetGlobalMemstoreSize(addedSize); + requestFlushIfNeeded(); } } finally { closeRegionOperation(op); @@ -3537,20 +3585,61 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // If catalog region, do not impose resource constraints or block updates. if (this.getRegionInfo().isMetaRegion()) return; - if (this.memstoreSize.get() > this.blockingMemStoreSize) { + long memstoreSize = this.getMemstoreTotalSize(); + // block writes and force flush + if (memstoreSize > this.blockingMemStoreSize) { blockedRequestsCount.increment(); - requestFlush(); + requestAndForceFlush(false); throw new RegionTooBusyException("Above memstore limit, " + "regionName=" + (this.getRegionInfo() == null ? "unknown" : this.getRegionInfo().getRegionNameAsString()) + ", server=" + (this.getRegionServerServices() == null ? "unknown" : this.getRegionServerServices().getServerName()) + - ", memstoreSize=" + memstoreSize.get() + + ", memstoreSize=" + memstoreSize + ", blockingMemStoreSize=" + blockingMemStoreSize); } } /** + * requests flush if the size of all memstores in region exceeds the flush thresholds; force + * the flush if it exceeds the force flush threshold + * @throws RegionTooBusyException + */ + private void requestFlushIfNeeded() throws RegionTooBusyException { + long memstoreSize = this.getMemstoreSize(); + long memstoreTotalSize = this.getMemstoreTotalSize(); // including compaction pipelines + + // force flush + if (memstoreTotalSize > this.memStoreForceFlushSize) { + requestAndForceFlush(true); + return; + } + + // (regular) flush + if (memstoreSize > this.memstoreFlushSize) { + requestFlush(); + } + } + + /** + * request flush. + * If the memstore is not in compaction or we do not need to wait for compactions to end then + * force the flush. + * @param waitForCompactions whether to wait for the compaction to end or to force the flush + * without waiting + */ + private void requestAndForceFlush(boolean waitForCompactions) { + for (Store s : stores.values()) { + if(waitForCompactions && s.isMemstoreCompaction()) { + // do not force flush if memstore compaction is in progress + continue; + } + s.setForceFlush(); + } + requestFlush(); + } + + /** * @throws IOException Throws exception if region is in read-only mode. */ protected void checkReadOnly() throws IOException { @@ -4014,7 +4103,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi editsCount++; } if (flush) { - internalFlushcache(null, currentEditSeqId, stores.values(), status, false); + internalFlushcache(null, currentEditSeqId, stores.values(), status, false);//force flush } if (coprocessorHost != null) { @@ -4446,7 +4535,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi /** * Drops the memstore contents after replaying a flush descriptor or region open event replay * if the memstore edits have seqNums smaller than the given seq id - * @param flush the flush descriptor * @throws IOException */ private long dropMemstoreContentsForSeqId(long seqId, Store store) throws IOException { @@ -6539,7 +6627,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi return null; } ClientProtos.RegionLoadStats.Builder stats = ClientProtos.RegionLoadStats.newBuilder(); - stats.setMemstoreLoad((int) (Math.min(100, (this.memstoreSize.get() * 100) / this + stats.setMemstoreLoad((int) (Math.min(100, (this.getMemstoreTotalSize() * 100) / this .memstoreFlushSize))); stats.setHeapOccupancy((int)rsServices.getHeapMemoryManager().getHeapOccupancyPercent()*100); return stats.build(); @@ -6706,9 +6794,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } finally { closeRegionOperation(); - if (!mutations.isEmpty() && - isFlushSize(this.addAndGetGlobalMemstoreSize(addedSize))) { - requestFlush(); + if (!mutations.isEmpty()) { + this.addAndGetGlobalMemstoreSize(addedSize); + requestFlushIfNeeded(); } } } @@ -6972,8 +7060,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // Append a faked WALEdit in order for SKIP_WAL updates to get mvcc assigned walKey = this.appendEmptyEdit(this.wal, memstoreCells); } - size = this.addAndGetGlobalMemstoreSize(size); - flush = isFlushSize(size); + this.addAndGetGlobalMemstoreSize(size); } finally { this.updatesLock.readLock().unlock(); } @@ -7004,10 +7091,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi this.metricsRegion.updateAppend(); } - if (flush) { - // Request a cache flush. Do it outside update lock. - requestFlush(); - } + // Request a cache flush. Do it outside update lock. + requestFlushIfNeeded(); return append.isReturnResults() ? Result.create(allKVs) : null; @@ -7192,8 +7277,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } } } - size = this.addAndGetGlobalMemstoreSize(size); - flush = isFlushSize(size); + this.addAndGetGlobalMemstoreSize(size); } // Actually write to WAL now @@ -7244,10 +7328,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } } - if (flush) { - // Request a cache flush. Do it outside update lock. - requestFlush(); - } + // Request a cache flush. Do it outside update lock. + requestFlushIfNeeded(); + return increment.isReturnResults() ? Result.create(allKVs) : null; } @@ -7267,8 +7350,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final long FIXED_OVERHEAD = ClassSize.align( ClassSize.OBJECT + ClassSize.ARRAY + - 44 * ClassSize.REFERENCE + 3 * Bytes.SIZEOF_INT + - (14 * Bytes.SIZEOF_LONG) + + 45 * ClassSize.REFERENCE + 3 * Bytes.SIZEOF_INT + + (15 * Bytes.SIZEOF_LONG) + 5 * Bytes.SIZEOF_BOOLEAN); // woefully out of date - currently missing: diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 315659a..3c22b3d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -18,40 +18,16 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.lang.Thread.UncaughtExceptionHandler; -import java.lang.management.ManagementFactory; -import java.lang.management.MemoryUsage; -import java.lang.reflect.Constructor; -import java.net.BindException; -import java.net.InetAddress; -import java.net.InetSocketAddress; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.locks.ReentrantReadWriteLock; - -import javax.management.MalformedObjectNameException; -import javax.management.ObjectName; -import javax.servlet.http.HttpServlet; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import com.google.protobuf.BlockingRpcChannel; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Message; +import com.google.protobuf.RpcCallback; +import com.google.protobuf.RpcController; +import com.google.protobuf.Service; +import com.google.protobuf.ServiceException; import org.apache.commons.lang.math.RandomUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -177,16 +153,38 @@ import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.data.Stat; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; -import com.google.protobuf.BlockingRpcChannel; -import com.google.protobuf.Descriptors; -import com.google.protobuf.Message; -import com.google.protobuf.RpcCallback; -import com.google.protobuf.RpcController; -import com.google.protobuf.Service; -import com.google.protobuf.ServiceException; +import javax.management.MalformedObjectNameException; +import javax.management.ObjectName; +import javax.servlet.http.HttpServlet; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.lang.Thread.UncaughtExceptionHandler; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryUsage; +import java.lang.reflect.Constructor; +import java.net.BindException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReentrantReadWriteLock; /** * HRegionServer makes a set of HRegions available to clients. It checks in with @@ -1438,7 +1436,7 @@ public class HRegionServer extends HasThread implements int storefiles = 0; int storeUncompressedSizeMB = 0; int storefileSizeMB = 0; - int memstoreSizeMB = (int) (r.getMemstoreSize() / 1024 / 1024); + int memstoreSizeMB = (int) (r.getMemstoreTotalSize() / 1024 / 1024); int storefileIndexSizeMB = 0; int rootIndexSizeKB = 0; int totalStaticIndexSizeKB = 0; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 7569e7a..a965c69 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -18,30 +18,12 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.net.InetSocketAddress; -import java.security.Key; -import java.security.KeyException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableSet; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.CompletionService; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.Future; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.locks.ReentrantReadWriteLock; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableCollection; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -78,9 +60,9 @@ import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; +import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController; import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; import org.apache.hadoop.hbase.regionserver.compactions.OffPeakHours; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController; import org.apache.hadoop.hbase.regionserver.wal.WALUtil; import org.apache.hadoop.hbase.security.EncryptionUtil; import org.apache.hadoop.hbase.security.User; @@ -94,12 +76,29 @@ import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableCollection; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.InetSocketAddress; +import java.security.Key; +import java.security.KeyException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableSet; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; /** * A Store holds a column family in a Region. Its a memstore and a set of zero @@ -125,7 +124,7 @@ public class HStore implements Store { private static final Log LOG = LogFactory.getLog(HStore.class); - protected final MemStore memstore; + protected final AbstractMemStore memstore; // This stores directory in the filesystem. protected final HRegion region; private final HColumnDescriptor family; @@ -232,9 +231,13 @@ public class HStore implements Store { // Why not just pass a HColumnDescriptor in here altogether? Even if have // to clone it? scanInfo = new ScanInfo(family, ttl, timeToPurgeDeletes, this.comparator); - String className = conf.get(MEMSTORE_CLASS_NAME, DefaultMemStore.class.getName()); - this.memstore = ReflectionUtils.instantiateWithCustomCtor(className, new Class[] { - Configuration.class, CellComparator.class }, new Object[] { conf, this.comparator }); + if(family.isInMemory()) { + this.memstore = new CompactedMemStore(conf, this.comparator, this); + } else { + String className = conf.get(MEMSTORE_CLASS_NAME, DefaultMemStore.class.getName()); + this.memstore = ReflectionUtils.instantiateWithCustomCtor(className, new Class[] { + Configuration.class, CellComparator.class }, new Object[] { conf, this.comparator }); + } this.offPeakHours = OffPeakHours.getInstance(conf); // Setting up cache configuration for this family @@ -1980,8 +1983,6 @@ public class HStore implements Store { } /** - * Used in tests. TODO: Remove - * * Updates the value for the given row/family/qualifier. This function will always be seen as * atomic by other readers because it only puts a single KV to memstore. Thus no read/write * control necessary. @@ -1992,6 +1993,7 @@ public class HStore implements Store { * @return memstore size delta * @throws IOException */ + @VisibleForTesting public long updateColumnValue(byte [] row, byte [] f, byte [] qualifier, long newValue) throws IOException { @@ -2270,4 +2272,14 @@ public class HStore implements Store { public double getCompactionPressure() { return storeEngine.getStoreFileManager().getCompactionPressure(); } + + @Override + public void setForceFlush() { + this.memstore.setForceFlush(); + } + + @Override + public boolean isMemstoreCompaction() { + return memstore.isMemstoreCompaction(); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java index d24299d..c4dea1e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java @@ -17,13 +17,14 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.util.List; - -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.util.Pair; +import java.io.IOException; +import java.util.List; + /** * The MemStore holds in-memory modifications to the Store. Modifications are {@link Cell}s. *

@@ -130,7 +131,7 @@ public interface MemStore extends HeapSize { * @return scanner over the memstore. This might include scanner over the snapshot when one is * present. */ - List getScanners(long readPt); + List getScanners(long readPt) throws IOException; /** * @return Total memory occupied by this MemStore. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreCompactor.java new file mode 100644 index 0000000..d9ee175 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreCompactor.java @@ -0,0 +1,230 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Scan; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * The ongoing MemStore Compaction manager, dispatches a solo running compaction + * and interrupts the compaction if requested. + * The MemStoreScanner is used to traverse the compaction pipeline. The MemStoreScanner + * is included in internal store scanner, where all compaction logic is implemented. + * Threads safety: It is assumed that the compaction pipeline is immutable, + * therefore no special synchronization is required. + */ +@InterfaceAudience.Private class MemStoreCompactor { + private static final Log LOG = LogFactory.getLog(MemStoreCompactor.class); + + private CompactionPipeline cp; // the subject for compaction + private CompactedMemStore ms; // backward reference + private MemStoreScanner scanner; // scanner for pipeline only + + private StoreScanner compactingScanner; // scanner on top of MemStoreScanner + // that uses ScanQueryMatcher + private Configuration conf; + private long // smallest read point for any ongoing + smallestReadPoint; // MemStore scan + private VersionedSegmentsList // a static version of the CellSetMgrs + versionedList; // list from the pipeline + private final CellComparator comparator; + + private static final ExecutorService pool // Thread pool shared by all scanners + = Executors.newCachedThreadPool(); + private final AtomicBoolean inCompaction = new AtomicBoolean(false); + private final AtomicBoolean isInterrupted = new AtomicBoolean(false); + + /** + * ---------------------------------------------------------------------- + * The constructor is used only to initialize basics, other parameters + * needing to start compaction will come with startCompact() + */ + public MemStoreCompactor(CompactedMemStore ms, CompactionPipeline cp, + CellComparator comparator, Configuration conf) { + + this.ms = ms; + this.cp = cp; + this.comparator = comparator; + this.conf = conf; + } + + /** + * ---------------------------------------------------------------------- + * The request to dispatch the compaction asynchronous task. + * The method returns true if compaction was successfully dispatched, or false if there + * is already an ongoing compaction (or pipeline is empty). + */ + public boolean startCompact(Store store) throws IOException { + if (cp.isEmpty()) return false; // no compaction on empty pipeline + + if (!inCompaction.get()) { // dispatch + List scanners = new ArrayList(); + this.versionedList = // get the list of CellSetMgrs from the pipeline + cp.getVersionedList(); // the list is marked with specific version + + // create the list of scanners with maximally possible read point, meaning that + // all KVs are going to be returned by the pipeline traversing + for (MemStoreSegment mgr : this.versionedList.getMemStoreSegments()) { + scanners.add(mgr.getScanner(Long.MAX_VALUE)); + } + scanner = + new MemStoreScanner(ms, scanners, Long.MAX_VALUE, MemStoreScanner.Type.COMPACT_FORWARD); + + smallestReadPoint = store.getSmallestReadPoint(); + compactingScanner = createScanner(store); + + Runnable worker = new Worker(); + LOG.info("Starting the MemStore in-memory compaction"); + pool.execute(worker); + inCompaction.set(true); + return true; + } + return false; + } + + /*---------------------------------------------------------------------- + * The request to cancel the compaction asynchronous task + * The compaction may still happen if the request was sent too late + * Non-blocking request + */ + public void stopCompact() { + if (inCompaction.get()) isInterrupted.compareAndSet(false, true); + inCompaction.set(false); + } + + public boolean isInCompaction() { + return inCompaction.get(); + } + + /*---------------------------------------------------------------------- + * Close the scanners and clear the pointers in order to allow good + * garbage collection + */ + private void releaseResources() { + isInterrupted.set(false); + scanner.close(); + scanner = null; + compactingScanner.close(); + compactingScanner = null; + versionedList = null; + } + + /*---------------------------------------------------------------------- + * The worker thread performs the compaction asynchronously. + * The solo (per compactor) thread only reads the compaction pipeline. + * There is at most one thread per memstore instance. + */ + private class Worker implements Runnable { + + @Override public void run() { + MemStoreSegment result = MemStoreSegment.Factory.instance() + .createMemStoreSegment(CellSet.Type.COMPACTED_READ_ONLY, conf, comparator, + CompactedMemStore.DEEP_OVERHEAD_PER_PIPELINE_ITEM); + // the compaction processing + KeyValue cell; + try { + // Phase I: create the compacted MemStoreSegment + compactSegments(result); + // Phase II: swap the old compaction pipeline + if (!Thread.currentThread().isInterrupted()) { + cp.swap(versionedList, result); + } + } catch (Exception e) { + Thread.currentThread().interrupt(); + return; + } finally { + stopCompact(); + releaseResources(); + } + + } + } + + /** + * Creates the scanner for compacting the pipeline. + * + * @return the scanner + */ + private StoreScanner createScanner(Store store) throws IOException { + + Scan scan = new Scan(); + scan.setMaxVersions(); //Get all available versions + + StoreScanner internalScanner = + new StoreScanner(store, store.getScanInfo(), scan, Collections.singletonList(scanner), + ScanType.COMPACT_RETAIN_DELETES, smallestReadPoint, HConstants.OLDEST_TIMESTAMP); + + return internalScanner; + } + + /** + * Creates a single MemStoreSegment using the internal store scanner, + * who in turn uses ScanQueryMatcher + */ + private void compactSegments(MemStoreSegment result) throws IOException { + + List kvs = new ArrayList(); + int compactionKVMax = conf.getInt( // get the limit to the size of the + HConstants.COMPACTION_KV_MAX, // groups to be returned by compactingScanner + HConstants.COMPACTION_KV_MAX_DEFAULT); + + ScannerContext scannerContext = + ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build(); + + boolean hasMore; + do { + hasMore = compactingScanner.next(kvs, scannerContext); + if (!kvs.isEmpty()) { + for (Cell c : kvs) { + // The scanner is doing all the elimination logic + // now we just copy it to the new segment + KeyValue kv = KeyValueUtil.ensureKeyValue(c); + Cell newKV = result.maybeCloneWithAllocator(kv); + result.add(newKV); + + } + kvs.clear(); + } + } while (hasMore && (!isInterrupted.get())); + + } + + // methods for tests + void toggleCompaction(boolean on) { + if (on) { + inCompaction.set(false); + } else { + inCompaction.set(true); + } + } + +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java index 40c5046..cdde746 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java @@ -165,7 +165,7 @@ class MemStoreFlusher implements FlushRequester { Region regionToFlush; if (bestFlushableRegion != null && - bestAnyRegion.getMemstoreSize() > 2 * bestFlushableRegion.getMemstoreSize()) { + bestAnyRegion.getMemstoreTotalSize() > 2 * bestFlushableRegion.getMemstoreTotalSize()) { // Even if it's not supposed to be flushed, pick a region if it's more than twice // as big as the best flushable one - otherwise when we're under pressure we make // lots of little flushes and cause lots of compactions, etc, which just makes @@ -214,6 +214,7 @@ class MemStoreFlusher implements FlushRequester { + humanReadableInt(regionToFlush.getMemstoreSize())); flushedOne = flushRegion(regionToFlush, true, true); + Preconditions.checkState(regionToFlush.getMemstoreTotalSize() > 0); if (!flushedOne) { LOG.info("Excluding unflushable region " + regionToFlush + " - trying to find a different region to flush."); @@ -508,7 +509,8 @@ class MemStoreFlusher implements FlushRequester { lock.readLock().lock(); try { notifyFlushRequest(region, emergencyFlush); - FlushResult flushResult = region.flush(forceFlushAllStores); + boolean forceFlushInsteadOfCompaction = false; + FlushResult flushResult = region.flush(forceFlushAllStores,forceFlushInsteadOfCompaction); boolean shouldCompact = flushResult.isCompactionNeeded(); // We just want to check the size boolean shouldSplit = ((HRegion)region).checkSplit() != null; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreScanner.java new file mode 100644 index 0000000..11e287f --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreScanner.java @@ -0,0 +1,301 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.htrace.Trace; + +import java.io.IOException; +import java.util.List; +import java.util.SortedSet; + +/** + * This is the scanner for any *MemStore implementation, derived from MemStore. + * Currently, the scanner works with DefaultMemStore and CompactMemStore. + * The MemStoreScanner combines CellSetMgrScanners from different CellSetMgrs and + * uses the key-value heap and the reversed key-value heap for the aggregated key-values set. + * It is assumed that only traversing forward or backward is used (without zigzagging in between) + */ +@InterfaceAudience.Private public class MemStoreScanner extends NonLazyKeyValueScanner { + /** + * Types of cell MemStoreScanner + */ + static public enum Type { + UNDEFINED, + COMPACT_FORWARD, + USER_SCAN_FORWARD, + USER_SCAN_BACKWARD + } + + private KeyValueHeap forwardHeap; // heap of scanners used for traversing forward + private ReversedKeyValueHeap backwardHeap; // reversed scanners heap for traversing backward + + private Type type = Type.UNDEFINED; // The type of the scan is defined by constructor + // or according to the first usage + + private long readPoint; + List scanners; // remember the initial version of the scanners list + private AbstractMemStore // pointer back to the relevant MemStore + backwardReferenceToMemStore; // is needed for shouldSeek() method + + /** + * Constructor. + * If UNDEFINED type for MemStoreScanner is provided, the forward heap is used as default! + * After constructor only one heap is going to be initialized for entire lifespan + * of the MemStoreScanner. A specific scanner ca only be one directed! + * + * @param readPoint Read point below which we can safely remove duplicate KVs + * @param type The scan type COMPACT_FORWARD should be used for compaction + * @param ms Pointer back to the MemStore + */ + public MemStoreScanner(AbstractMemStore ms, long readPoint, Type type) throws IOException { + this(ms, ms.getListOfScanners(readPoint), readPoint, type); + } + + /* Constructor used only when the scan usage is unknown and need to be defined according to the first move */ + public MemStoreScanner(AbstractMemStore ms, long readPt) throws IOException { + this(ms, readPt, Type.UNDEFINED); + } + + public MemStoreScanner(AbstractMemStore ms, List scanners, long readPoint, + Type type) throws IOException { + super(); + this.readPoint = readPoint; + this.type = type; + switch (type) { + case UNDEFINED: + case USER_SCAN_FORWARD: + case COMPACT_FORWARD: + this.forwardHeap = new KeyValueHeap(scanners, ms.getComparator()); + break; + case USER_SCAN_BACKWARD: + this.backwardHeap = new ReversedKeyValueHeap(scanners, ms.getComparator()); + break; + } + this.backwardReferenceToMemStore = ms; + this.scanners = scanners; + if (Trace.isTracing() && Trace.currentSpan() != null) { + Trace.currentSpan().addTimelineAnnotation("Creating MemStoreScanner"); + } + } + + /** + * Returns the cell from the top-most scanner without advancing the iterator. + * The backward traversal is assumed, only if specified explicitly + */ + @Override public synchronized Cell peek() { + if (type == Type.USER_SCAN_BACKWARD) return backwardHeap.peek(); + return forwardHeap.peek(); + } + + /** + * Gets the next cell from the top-most scanner. Assumed forward scanning. + */ + @Override public synchronized Cell next() throws IOException { + KeyValueHeap heap = (Type.USER_SCAN_BACKWARD == type) ? backwardHeap : forwardHeap; + + for (Cell currentCell = heap.next(); // loop over till the next suitable value + currentCell != null; // take next value from the forward heap + currentCell = heap.next()) { + + // all the logic of presenting cells is inside the internal MemStoreSegmentScanners + // located inside the heap + + return currentCell; + } + return null; + } + + /** + * Set the scanner at the seek key. Assumed forward scanning. + * Must be called only once: there is no thread safety between the scanner + * and the memStore. + * + * @param cell seek value + * @return false if the key is null or if there is no data + */ + @Override public synchronized boolean seek(Cell cell) throws IOException { + assertForward(); + + if (cell == null) { + close(); + return false; + } + + return forwardHeap.seek(cell); + } + + /** + * Move forward on the sub-lists set previously by seek. Assumed forward scanning. + * + * @param cell seek value (should be non-null) + * @return true if there is at least one KV to read, false otherwise + */ + @Override public synchronized boolean reseek(Cell cell) throws IOException { + /* + * See HBASE-4195 & HBASE-3855 & HBASE-6591 for the background on this implementation. + * This code is executed concurrently with flush and puts, without locks. + * Two points must be known when working on this code: + * 1) It's not possible to use the 'kvTail' and 'snapshot' + * variables, as they are modified during a flush. + * 2) The ideal implementation for performance would use the sub skip list + * implicitly pointed by the iterators 'kvsetIt' and + * 'snapshotIt'. Unfortunately the Java API does not offer a method to + * get it. So we remember the last keys we iterated to and restore + * the reseeked set to at least that point. + * + * TODO: The above comment copied from the original MemStoreScanner + */ + assertForward(); + return forwardHeap.reseek(cell); + } + + /** + * MemStoreScanner returns max value as sequence id because it will + * always have the latest data among all files. + */ + @Override public synchronized long getSequenceID() { + return Long.MAX_VALUE; + } + + @Override public synchronized void close() { + + if (forwardHeap != null) { + assert ((type == Type.USER_SCAN_FORWARD) || + (type == Type.COMPACT_FORWARD) || (type == Type.UNDEFINED)); + forwardHeap.close(); + forwardHeap = null; + if (backwardHeap != null) { + backwardHeap.close(); + backwardHeap = null; + } + } else if (backwardHeap != null) { + assert (type == Type.USER_SCAN_BACKWARD); + backwardHeap.close(); + backwardHeap = null; + } + } + + /** + * Set the scanner at the seek key. Assumed backward scanning. + * + * @param cell seek value + * @return false if the key is null or if there is no data + */ + @Override public synchronized boolean backwardSeek(Cell cell) throws IOException { + initiBackwHeapIfNeeded(cell, false); + return backwardHeap.backwardSeek(cell); + } + + /** + * Assumed backward scanning. + * + * @param cell seek value + * @return false if the key is null or if there is no data + */ + @Override public synchronized boolean seekToPreviousRow(Cell cell) throws IOException { + initiBackwHeapIfNeeded(cell, false); + if (backwardHeap.peek() == null) restartBackwHeap(cell); + return backwardHeap.seekToPreviousRow(cell); + } + + @Override public synchronized boolean seekToLastRow() throws IOException { + // TODO: it looks like this is how it should be, however ReversedKeyValueHeap class doesn't + // implement seekToLastRow() method :( + // however seekToLastRow() was implemented in internal MemStoreScanner + // so I wonder whether we need to come with our own workaround, or to update ReversedKeyValueHeap + return initiBackwHeapIfNeeded(KeyValue.LOWESTKEY, true); + //return backwardHeap.seekToLastRow(); + } + + /** + * Check if this memstore may contain the required keys + * + * @param scan + * @return False if the key definitely does not exist in this Memstore + */ + @Override public synchronized boolean shouldUseScanner(Scan scan, SortedSet columns, + long oldestUnexpiredTS) { + boolean result = false; + if (type == Type.COMPACT_FORWARD) return true; + + for (MemStoreSegmentScanner sc : scanners) { + result |= sc.shouldSeek(scan, oldestUnexpiredTS); + } + return result; + } + + /****************** Private methods ******************/ + /** + * Restructure the ended backward heap after rerunning a seekToPreviousRow() + * on each scanner + */ + private boolean restartBackwHeap(Cell cell) throws IOException { + boolean res = false; + for (MemStoreSegmentScanner scan : scanners) + res |= scan.seekToPreviousRow(cell); + this.backwardHeap = + new ReversedKeyValueHeap(scanners, backwardReferenceToMemStore.getComparator()); + return res; + } + + /** + * Checks whether the type of the scan suits the assumption of moving forward + */ + private boolean initiBackwHeapIfNeeded(Cell cell, boolean toLast) throws IOException { + boolean res = false; + if (toLast && (type != Type.UNDEFINED)) + throw new IllegalStateException("Wrong usage of initiBackwHeapIfNeeded in parameters"); + if (type == Type.UNDEFINED) { + // In case we started from peek, release the forward heap + // and build backward. Set the correct type. Thus this turn + // can happen only once + if ((backwardHeap == null) && (forwardHeap != null)) { + forwardHeap.close(); + forwardHeap = null; + // before building the heap seek for the relevant key on the scanners, + // for the heap to be built from the scanners correctly + for (MemStoreSegmentScanner scan : scanners) + if (toLast) res |= scan.seekToLastRow(); + else res |= scan.backwardSeek(cell); + this.backwardHeap = + new ReversedKeyValueHeap(scanners, backwardReferenceToMemStore.getComparator()); + type = Type.USER_SCAN_BACKWARD; + } + } + + if (type == Type.USER_SCAN_FORWARD) + throw new IllegalStateException("Traversing backward with forward scan"); + return res; + } + + /** + * Checks whether the type of the scan suits the assumption of moving forward + */ + private void assertForward() throws IllegalStateException { + if (type == Type.UNDEFINED) type = Type.USER_SCAN_FORWARD; + + if (type == Type.USER_SCAN_BACKWARD) + throw new IllegalStateException("Traversing forward with backward scan"); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSegment.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSegment.java new file mode 100644 index 0000000..036e2d8 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSegment.java @@ -0,0 +1,361 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.ReflectionUtils; + +import java.util.Iterator; +import java.util.SortedSet; +import java.util.concurrent.atomic.AtomicLong; + +/** + * This is an abstraction of a cell set bucket maintained in a memstore, e.g., the active + * cell set or a snapshot of it. + * It mainly encapsulates the kv-set and its respective memory allocation buffers (MSLAB). + * This class facilitates the management of the compaction pipeline and the shifts of these buckets + * from active set to snapshot set in the default implementation. + */ +@InterfaceAudience.Private +class MemStoreSegment { + + static final String USEMSLAB_KEY = "hbase.hregion.memstore.mslab.enabled"; + static final boolean USEMSLAB_DEFAULT = true; + static final String MSLAB_CLASS_NAME = "hbase.regionserver.mslab.class"; + + private volatile CellSet cellSet; + private volatile MemStoreLAB memStoreLAB; + private final CellComparator comparator; + private TimeRangeTracker timeRangeTracker; + private final AtomicLong size; + private volatile boolean tagsPresent; + + // private c-tors. Instantiate objects only using factory + private MemStoreSegment(CellSet cellSet, MemStoreLAB memStoreLAB, long size, + CellComparator comparator) { + this.cellSet = cellSet; + this.memStoreLAB = memStoreLAB; + this.comparator = comparator; + this.timeRangeTracker = new TimeRangeTracker(); + this.size = new AtomicLong(size); + this.tagsPresent = false; + } + + private MemStoreSegment(CellSet cellSet, long size, CellComparator comparator) { + this(cellSet, null, size, comparator); + } + + public MemStoreSegmentScanner getScanner(long readPoint) { + return new MemStoreSegmentScanner(this, readPoint); + } + + public boolean isEmpty() { + return getCellSet().isEmpty(); + } + + public int getCellsCount() { + return getCellSet().size(); + } + + public long add(Cell e) { + boolean succ = getCellSet().add(e); + long s = AbstractMemStore.heapSizeChange(e, succ); + updateMetaInfo(e, s); + // In no tags case this NoTagsKeyValue.getTagsLength() is a cheap call. + // When we use ACL CP or Visibility CP which deals with Tags during + // mutation, the TagRewriteCell.getTagsLength() is a cheaper call. We do not + // parse the byte[] to identify the tags length. + if(e.getTagsLength() > 0) { + tagsPresent = true; + } + return s; + } + + public boolean remove(Cell e) { + return getCellSet().remove(e); + } + + public Cell get(Cell cell) { + return getCellSet().get(cell); + } + + public Cell last() { + return getCellSet().last(); + } + + public Iterator iterator() { + return getCellSet().iterator(); + } + + public SortedSet headSet(KeyValue firstKeyOnRow) { + return getCellSet().headSet(firstKeyOnRow); + } + + public SortedSet tailSet(Cell firstCell) { + return getCellSet().tailSet(firstCell); + } + + public void close() { + MemStoreLAB mslab = getMemStoreLAB(); + if(mslab != null ) { + mslab.close(); + } + // do not set MSLab to null as scanners may still be reading the data here and need to decrease + // the counter when they finish + } + + public Cell maybeCloneWithAllocator(Cell cell) { + if (getMemStoreLAB() == null) { + return cell; + } + + int len = KeyValueUtil.length(cell); + ByteRange alloc = getMemStoreLAB().allocateBytes(len); + if (alloc == null) { + // The allocation was too large, allocator decided + // not to do anything with it. + return cell; + } + assert alloc.getBytes() != null; + KeyValueUtil.appendToByteArray(cell, alloc.getBytes(), alloc.getOffset()); + KeyValue newKv = new KeyValue(alloc.getBytes(), alloc.getOffset(), len); + newKv.setSequenceId(cell.getSequenceId()); + return newKv; + } + + public void incScannerCount() { + if(getMemStoreLAB() != null) { + getMemStoreLAB().incScannerCount(); + } + } + + public void decScannerCount() { + if(getMemStoreLAB() != null) { + getMemStoreLAB().decScannerCount(); + } + } + + public long rollback(Cell cell) { + Cell found = get(cell); + if (found != null && found.getSequenceId() == cell.getSequenceId()) { + long sz = AbstractMemStore.heapSizeChange(cell, true); + remove(cell); + size.addAndGet(-sz); + return sz; + } + return 0; + } + + public void updateMetaInfo(Cell toAdd, long s) { + getTimeRangeTracker().includeTimestamp(toAdd); + size.addAndGet(s); + } + + public boolean shouldSeek(Scan scan, long oldestUnexpiredTS) { + return (getTimeRangeTracker().includesTimeRange(scan.getTimeRange()) + && (getTimeRangeTracker().getMaximumTimestamp() >= + oldestUnexpiredTS)); + } + + /* + * @param set + * @param state Accumulates deletes and candidates. + */ +// public void getRowKeyAtOrBefore(final GetClosestRowBeforeTracker state) { +// if (isEmpty()) { +// return; +// } +// if (!walkForwardInSingleRow(state.getTargetKey(), state)) { +// // Found nothing in row. Try backing up. +// getRowKeyBefore(state); +// } +// } + + // methods for cell set scanner + public int compare(Cell left, Cell right) { + return getComparator().compare(left, right); + } + + public int compareRows(Cell left, Cell right) { + return getComparator().compareRows(left, right); + } + + + public void setSize(long size) { + this.size.set(size); + } + + public CellSet getCellSet() { + return cellSet; + } + + public TimeRangeTracker getTimeRangeTracker() { + return timeRangeTracker; + } + + public long getSize() { + return size.get(); + } + + public void incSize(long delta) { + size.addAndGet(delta); + } + + public boolean isTagsPresent() { + return tagsPresent; + } + + private MemStoreLAB getMemStoreLAB() { + return memStoreLAB; + } + + private CellComparator getComparator() { + return comparator; + } + + // methods for tests + Cell first() { + return this.getCellSet().first(); + } + + /* + * Walk forward in a row from firstOnRow. Presumption is that + * we have been passed the first possible key on a row. As we walk forward + * we accumulate deletes until we hit a candidate on the row at which point + * we return. + * @param set + * @param firstOnRow First possible key on this row. + * @param state + * @return True if we found a candidate walking this row. + */ +// private boolean walkForwardInSingleRow(final Cell firstOnRow, final GetClosestRowBeforeTracker state) { +// boolean foundCandidate = false; +// SortedSet tail = getCellSet().tailSet(firstOnRow); +// if (tail.isEmpty()) return foundCandidate; +// for (Iterator i = tail.iterator(); i.hasNext();) { +// Cell kv = i.next(); +// // Did we go beyond the target row? If so break. +// if (state.isTooFar(kv, firstOnRow)) break; +// if (state.isExpired(kv)) { +// i.remove(); +// continue; +// } +// // If we added something, this row is a contender. break. +// if (state.handle(kv)) { +// foundCandidate = true; +// break; +// } +// } +// return foundCandidate; +// } + + /* + * Walk backwards through the passed set a row at a time until we run out of + * set or until we get a candidate. + * @param set + * @param state + */ +// private void getRowKeyBefore(final GetClosestRowBeforeTracker state) { +// KeyValue firstOnRow = state.getTargetKey(); +// for (Cell p = memberOfPreviousRow(state, firstOnRow); +// p != null; p = memberOfPreviousRow(state, firstOnRow)) { +// // Make sure we don't fall out of our table. +// if (!state.isTargetTable(p)) break; +// // Stop looking if we've exited the better candidate range. +// if (!state.isBetterCandidate(p)) break; +// // Make into firstOnRow +// firstOnRow = new KeyValue(p.getRowArray(), p.getRowOffset(), p.getRowLength(), +// HConstants.LATEST_TIMESTAMP); +// // If we find something, break; +// if (walkForwardInSingleRow(firstOnRow, state)) break; +// } +// } + + /* + * @param set Set to walk back in. Pass a first in row or we'll return + * same row (loop). + * @param state Utility and context. + * @param firstOnRow First item on the row after the one we want to find a + * member in. + * @return Null or member of row previous to firstOnRow + */ +// private Cell memberOfPreviousRow(final GetClosestRowBeforeTracker state, +// final KeyValue firstOnRow) { +// NavigableSet head = getCellSet().headSet(firstOnRow, false); +// if (head.isEmpty()) return null; +// for (Iterator i = head.descendingIterator(); i.hasNext();) { +// Cell found = i.next(); +// if (state.isExpired(found)) { +// i.remove(); +// continue; +// } +// return found; +// } +// return null; +// } + + /** + * A singleton cell set manager factory. + * Maps each cell set type to a specific implementation + */ + static class Factory { + + private Factory() {} + private static Factory instance = new Factory(); + public static Factory instance() { return instance; } + + public MemStoreSegment createMemStoreSegment(CellSet.Type type, final Configuration conf, + final CellComparator comparator, long size) { + MemStoreLAB memStoreLAB = null; + if (conf.getBoolean(USEMSLAB_KEY, USEMSLAB_DEFAULT)) { + String className = conf.get(MSLAB_CLASS_NAME, HeapMemStoreLAB.class.getName()); + memStoreLAB = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class }, new Object[] { conf }); + //memStoreLAB = new MemStoreLAB(conf, MemStoreChunkPool.getPool(conf)); + } + return createMemStoreSegment(type, comparator, memStoreLAB, size); + } + + public MemStoreSegment createMemStoreSegment(CellSet.Type type, + CellComparator comparator, + long size) { + return createMemStoreSegment(type, comparator, null, size); + } + + public MemStoreSegment createMemStoreSegment(CellSet.Type type, + CellComparator comparator, + MemStoreLAB memStoreLAB, long size) { + return generateMemStoreSegmentByType(type, comparator, memStoreLAB, size); + } + + private MemStoreSegment generateMemStoreSegmentByType(CellSet.Type type, + CellComparator comparator, MemStoreLAB memStoreLAB, long size) { + CellSet set = new CellSet(type, comparator); + return new MemStoreSegment(set, memStoreLAB, size, comparator); + } + + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSegmentScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSegmentScanner.java new file mode 100644 index 0000000..fc5e8dc --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSegmentScanner.java @@ -0,0 +1,421 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Scan; + +import java.io.IOException; +import java.util.Iterator; +import java.util.SortedSet; + +/** + * A scanner of a single cells segment {@link MemStoreSegment}. + */ +@InterfaceAudience.Private +class MemStoreSegmentScanner implements KeyValueScanner { + + private final MemStoreSegment segment; // the observed structure + private long readPoint; // the highest relevant MVCC + private Iterator iter; // the current iterator that can be reinitialized by + // seek(), backwardSeek(), or reseek() + private Cell current = null; // the pre-calculated cell to be returned by peek() + // or next() + // A flag represents whether could stop skipping KeyValues for MVCC + // if have encountered the next row. Only used for reversed scan + private boolean stopSkippingKVsIfNextRow = false; + // last iterated KVs by seek (to restore the iterator state after reseek) + private Cell last = null; + private long sequenceID = Long.MAX_VALUE; + + /** + * --------------------------------------------------------- + * C-tor + */ + public MemStoreSegmentScanner(MemStoreSegment segment, long readPoint) { + super(); + this.segment = segment; + this.readPoint = readPoint; + iter = segment.iterator(); + // the initialization of the current is required for working with heap of SegmentScanners + current = getNext(); + //increase the reference count so the underlying structure will not be de-allocated + this.segment.incScannerCount(); + } + + + /** + * --------------------------------------------------------- + * Look at the next Cell in this scanner, but do not iterate the scanner + * + * @return the currently observed Cell + */ + @Override + public Cell peek() { // sanity check, the current should be always valid + if (current!=null && current.getSequenceId() > readPoint) { + assert (false); // sanity check, the current should be always valid + } + + return current; + } + + + /** + * --------------------------------------------------------- + * Return the next Cell in this scanner, iterating the scanner + * + * @return the next Cell or null if end of scanner + */ + @Override + public Cell next() throws IOException { + Cell oldCurrent = current; + current = getNext(); // update the currently observed Cell + return oldCurrent; + } + + + /** + * --------------------------------------------------------- + * Seek the scanner at or after the specified KeyValue. + * + * @param cell seek value + * @return true if scanner has values left, false if end of scanner + */ + @Override + public boolean seek(Cell cell) throws IOException { + // restart the iterator from new key + iter = segment.tailSet(cell).iterator(); + last = null; // last is going to be reinitialized in the next getNext() call + current = getNext(); + return (current != null); + } + + + /** + * --------------------------------------------------------- + * Reseek the scanner at or after the specified KeyValue. + * This method is guaranteed to seek at or after the required key only if the + * key comes after the current position of the scanner. Should not be used + * to seek to a key which may come before the current position. + * + * @param cell seek value (should be non-null) + * @return true if scanner has values left, false if end of scanner + */ + @Override + public boolean reseek(Cell cell) throws IOException { + + /* + * The ideal implementation for performance would use the sub skip list implicitly + * pointed by the iterator. Unfortunately the Java API does not offer a method to + * get it. So we remember the last keys we iterated to and restore + * the reseeked set to at least that point. + */ + iter = segment.tailSet(getHighest(cell, last)).iterator(); + current = getNext(); + return (current != null); + } + + + /** + * --------------------------------------------------------- + * Get the sequence id associated with this KeyValueScanner. This is required + * for comparing multiple files (or memstore segments) scanners to find out + * which one has the latest data. + * + */ + @Override + public long getSequenceID() { + return sequenceID; + } + + public void setSequenceID(long x) { + sequenceID = x; + } + + + /** + * --------------------------------------------------------- + * Close the KeyValue scanner. + */ + @Override + public void close() { + this.segment.decScannerCount(); + } + + + /** + * --------------------------------------------------------- + * Allows to filter out scanners (both StoreFile and memstore) that we don't + * want to use based on criteria such as Bloom filters and timestamp ranges. + * + * @param scan the scan that we are selecting scanners for + * @param columns the set of columns in the current column family, or null if + * not specified by the scan + * @param oldestUnexpiredTS the oldest timestamp we are interested in for + * this query, based on TTL + * @return true if the scanner should be included in the query + *

+ * This functionality should be resolved in the higher level which is + * MemStoreScanner, currently returns false as default. Doesn't throw + * IllegalStateException in order not to change the signature of the + * overridden method + */ + @Override + public boolean shouldUseScanner(Scan scan, SortedSet columns, + long oldestUnexpiredTS) { + return true; + } + + + /** + * --------------------------------------------------------- + * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only + * does a seek operation after checking that it is really necessary for the + * row/column combination specified by the kv parameter. This function was + * added to avoid unnecessary disk seeks by checking row-column Bloom filters + * before a seek on multi-column get/scan queries, and to optimize by looking + * up more recent files first. + *

+ * This scanner is working solely on the in-memory MemStore therefore this + * interface is not relevant. + * + * @param c + * @param forward do a forward-only "reseek" instead of a random-access seek + * @param useBloom whether to enable multi-column Bloom filter optimization + */ + @Override + public boolean requestSeek(Cell c, boolean forward, boolean useBloom) + throws IOException { + + throw new IllegalStateException( + "requestSeek cannot be called on MemStoreSegmentScanner"); + } + + + /** + * --------------------------------------------------------- + * We optimize our store scanners by checking the most recent store file + * first, so we sometimes pretend we have done a seek but delay it until the + * store scanner bubbles up to the top of the key-value heap. This method is + * then used to ensure the top store file scanner has done a seek operation. + *

+ * This scanner is working solely on the in-memory MemStore and doesn't work on + * store files, MemStoreSegmentScanner always does the seek, therefore always returning true. + */ + @Override + public boolean realSeekDone() { + return true; + } + + + /** + * --------------------------------------------------------- + * Does the real seek operation in case it was skipped by + * seekToRowCol(KeyValue, boolean). Note that this function should + * be never called on scanners that always do real seek operations (i.e. most + * of the scanners and also this one). The easiest way to achieve this is to call + * {@link #realSeekDone()} first. + */ + @Override + public void enforceSeek() throws IOException { + throw new IllegalStateException( + "enforceSeek cannot be called on MemStoreSegmentScanner"); + } + + + /** + * --------------------------------------------------------- + * + * @return true if this is a file scanner. Otherwise a memory scanner is + * assumed. + */ + @Override + public boolean isFileScanner() { + return false; + } + + + /** + * --------------------------------------------------------- + * Seek the scanner at or before the row of specified Cell, it firstly + * tries to seek the scanner at or after the specified Cell, return if + * peek KeyValue of scanner has the same row with specified Cell, + * otherwise seek the scanner at the first Cell of the row which is the + * previous row of specified KeyValue + * + * @param key seek KeyValue + * @return true if the scanner is at the valid KeyValue, false if such + * KeyValue does not exist + */ + @Override + public boolean backwardSeek(Cell key) throws IOException { + seek(key); // seek forward then go backward + if (peek() == null || segment.compareRows(peek(), key) > 0) { + return seekToPreviousRow(key); + } + return true; + } + + + /** + * --------------------------------------------------------- + * Seek the scanner at the first Cell of the row which is the previous row + * of specified key + * + * @param cell seek value + * @return true if the scanner at the first valid Cell of previous row, + * false if not existing such Cell + */ + @Override + public boolean seekToPreviousRow(Cell cell) throws IOException { + + KeyValue firstKeyOnRow = // find a previous cell + KeyValueUtil.createFirstOnRow(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); + SortedSet cellHead = // here the search is hidden, reset the iterator + segment.headSet(firstKeyOnRow); + Cell lastCellBeforeRow = cellHead.isEmpty() ? null : cellHead.last(); + + if (lastCellBeforeRow == null) { // end of recursion + current = null; + return false; + } + + KeyValue firstKeyOnPreviousRow = // find a previous row + KeyValueUtil.createFirstOnRow(lastCellBeforeRow.getRowArray(), + lastCellBeforeRow.getRowOffset(), lastCellBeforeRow.getRowLength()); + + stopSkippingKVsIfNextRow = true; + // seek in order to update the iterator and current + seek(firstKeyOnPreviousRow); + stopSkippingKVsIfNextRow = false; + + // if nothing found or we searched beyond the needed, take one more step backward + if (peek() == null || segment.compareRows(peek(), firstKeyOnPreviousRow) > 0) { + return seekToPreviousRow(lastCellBeforeRow); + } + return true; + } + + + /** + * --------------------------------------------------------- + * Seek the scanner at the first KeyValue of last row + * + * @return true if scanner has values left, false if the underlying data is empty + * @throws java.io.IOException + */ + @Override + public boolean seekToLastRow() throws IOException { + Cell higherCell = segment.isEmpty() ? null : segment.last(); + if (higherCell == null) { + return false; + } + + KeyValue firstCellOnLastRow = KeyValueUtil.createFirstOnRow(higherCell.getRowArray(), + higherCell.getRowOffset(), higherCell.getRowLength()); + + if (seek(firstCellOnLastRow)) { + return true; + } else { + return seekToPreviousRow(higherCell); + } + } + + + /** + * --------------------------------------------------------- + * + * @return the next key in the index (the key to seek to the next block) + * if known, or null otherwise + *

+ * Not relevant for in-memory scanner + */ + @Override + public Cell getNextIndexedKey() { + return null; + } + + /** + * Called after a batch of rows scanned (RPC) and set to be returned to client. Any in between + * cleanup can be done here. Nothing to be done for MemStoreSegmentScanner. + */ + @Override + public void shipped() throws IOException { + // do nothing + } + + public boolean shouldSeek(Scan scan, long oldestUnexpiredTS) { + return segment.shouldSeek(scan,oldestUnexpiredTS); + } + + +/********************* Private Methods **********************/ + + /** + * --------------------------------------------------------- + * Private internal method for iterating over the segment, + * skipping the cells with irrelevant MVCC + */ + private Cell getNext() { + Cell startKV = current; + Cell next = null; + + try { + while (iter.hasNext()) { + next = iter.next(); + if (next.getSequenceId() <= this.readPoint) { + return next; // skip irrelevant versions + } + if (stopSkippingKVsIfNextRow && // for backwardSeek() stay in the + startKV != null && // boundaries of a single row + segment.compareRows(next, startKV) > 0) { + return null; + } + } // end of while + + return null; // nothing found + } finally { + if (next != null) { + // in all cases, remember the last KV we iterated to, needed for reseek() + last = next; + } + } + } + + + /** + * --------------------------------------------------------- + * Private internal method that returns the higher of the two key values, or null + * if they are both null + */ + private Cell getHighest(Cell first, Cell second) { + if (first == null && second == null) { + return null; + } + if (first != null && second != null) { + int compare = segment.compare(first, second); + return (compare > 0 ? first : second); + } + return (first != null ? first : second); + } + +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSnapshot.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSnapshot.java index be853c5..0420ed7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSnapshot.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreSnapshot.java @@ -17,7 +17,9 @@ */ package org.apache.hadoop.hbase.regionserver; +import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.CollectionBackedScanner; /** * Holds details of the snapshot taken on a MemStore. Details include the snapshot's identifier, @@ -34,14 +36,13 @@ public class MemStoreSnapshot { private final KeyValueScanner scanner; private final boolean tagsPresent; - public MemStoreSnapshot(long id, int cellsCount, long size, TimeRangeTracker timeRangeTracker, - KeyValueScanner scanner, boolean tagsPresent) { + public MemStoreSnapshot(long id, MemStoreSegment snapshot, CellComparator comparator) { this.id = id; - this.cellsCount = cellsCount; - this.size = size; - this.timeRangeTracker = timeRangeTracker; - this.scanner = scanner; - this.tagsPresent = tagsPresent; + this.cellsCount = snapshot.getCellsCount(); + this.size = snapshot.getSize(); + this.timeRangeTracker = snapshot.getTimeRangeTracker(); + this.scanner = new CollectionBackedScanner(snapshot.getCellSet(),comparator); + this.tagsPresent = snapshot.isTagsPresent(); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 65cedee..955b303 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -18,23 +18,7 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.net.InetSocketAddress; -import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.NavigableMap; -import java.util.Set; -import java.util.TreeSet; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; - +import com.google.protobuf.RpcController; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -181,10 +165,26 @@ import org.apache.zookeeper.KeeperException; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ByteString; import com.google.protobuf.Message; -import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; import com.google.protobuf.TextFormat; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.InetSocketAddress; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + /** * Implements the regionserver RPC services. */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java index 6d87057..ca2256b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java @@ -190,6 +190,9 @@ public interface Region extends ConfigurationObserver { /** @return memstore size for this region, in bytes */ long getMemstoreSize(); + /** @return total memstore size, including additional, like compactoin pipelines */ + public long getMemstoreTotalSize(); + /** @return the number of mutations processed bypassing the WAL */ long getNumMutationsWithoutWAL(); @@ -646,6 +649,30 @@ public interface Region extends ConfigurationObserver { FlushResult flush(boolean force) throws IOException; /** + * Flush the cache. + * + *

When this method is called the cache will be flushed unless: + *

    + *
  1. the cache is empty
  2. + *
  3. the region is closed.
  4. + *
  5. a flush is already in progress
  6. + *
  7. writes are disabled
  8. + *
+ * + *

This method may block for some time, so it should not be called from a + * time-sensitive thread. + * @param force whether we want to force a flush of all stores + * @param forceFlushInsteadOfCompaction whether to flush the compacting memstores as well + * @return FlushResult indicating whether the flush was successful or not and if + * the region needs compacting + * + * @throws IOException general io exceptions + * @throws DroppedSnapshotException Thrown when abort is required + * because a snapshot was not properly persisted. + */ + public FlushResult flush(boolean force, boolean forceFlushInsteadOfCompaction) throws IOException; + + /** * Synchronously compact all stores in the region. *

This operation could block for a long time, so don't call it from a * time-sensitive thread. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAccounting.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAccounting.java index 879b573..303290a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAccounting.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAccounting.java @@ -18,13 +18,13 @@ */ package org.apache.hadoop.hbase.regionserver; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.Bytes; + import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicLong; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.Bytes; - /** * RegionServerAccounting keeps record of some basic real time information about * the Region Server. Currently, it only keeps record the global memstore size. @@ -33,7 +33,8 @@ import org.apache.hadoop.hbase.util.Bytes; public class RegionServerAccounting { private final AtomicLong atomicGlobalMemstoreSize = new AtomicLong(0); - + private final AtomicLong atomicGlobalMemstorAdditionaleSize = new AtomicLong(0); + // Store the edits size during replaying WAL. Use this to roll back the // global memstore size once a region opening failed. private final ConcurrentMap replayEditsPerRegion = @@ -54,7 +55,11 @@ public class RegionServerAccounting { public long addAndGetGlobalMemstoreSize(long memStoreSize) { return atomicGlobalMemstoreSize.addAndGet(memStoreSize); } - + + public long addAndGetGlobalMemstoreAdditionalSize(long size) { + return atomicGlobalMemstorAdditionaleSize.addAndGet(size); + } + /*** * Add memStoreSize to replayEditsPerRegion. * diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java index edc166e..a16f062 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -420,6 +420,11 @@ public interface Store extends HeapSize, StoreConfigInformation, PropagatingConf */ double getCompactionPressure(); + // turn on the force flush flag to make sure data is flushed to disk + void setForceFlush(); + // check whether memstore compaction is in progress + boolean isMemstoreCompaction(); + /** * Replaces the store files that the store has with the given files. Mainly used by * secondary region replicas to keep up to date with diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/VersionedSegmentsList.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/VersionedSegmentsList.java new file mode 100644 index 0000000..4501545 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/VersionedSegmentsList.java @@ -0,0 +1,54 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +import java.util.LinkedList; + +/** + * A list of segment managers coupled with the version of the memstore (version at the time it was + * created). + * This structure helps to guarantee that the compaction pipeline updates after the compaction is + * updated in a consistent (atomic) way. + * Specifically, swapping some of the elements in a compaction pipeline with a new compacted + * element is permitted only if the pipeline version is the same as the version attached to the + * elements. + * + */ +@InterfaceAudience.Private +public class VersionedSegmentsList { + + private final LinkedList memStoreSegments; + private final long version; + + public VersionedSegmentsList( + LinkedList memStoreSegments, long version) { + this.memStoreSegments = memStoreSegments; + this.version = version; + } + + public LinkedList getMemStoreSegments() { + return memStoreSegments; + } + + public long getVersion() { + return version; + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestGlobalMemStoreSize.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestGlobalMemStoreSize.java index ab0e6b4..4cc9bb5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestGlobalMemStoreSize.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestGlobalMemStoreSize.java @@ -18,13 +18,6 @@ */ package org.apache.hadoop.hbase; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -42,6 +35,13 @@ import org.apache.hadoop.hbase.util.Threads; import org.junit.Test; import org.junit.experimental.categories.Category; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + /** * Test HBASE-3694 whether the GlobalMemStoreSize is the same as the summary * of all the online region's MemStoreSize @@ -141,7 +141,7 @@ public class TestGlobalMemStoreSize { } /** - * Flush and log stats on flush + * Flush (force) and log stats on flush * @param r * @param server * @throws IOException diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java index 7d644bd..c192f7d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestIOFencing.java @@ -17,15 +17,7 @@ */ package org.apache.hadoop.hbase; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicLong; - +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -55,7 +47,14 @@ import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.junit.Test; import org.junit.experimental.categories.Category; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicLong; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Test for the case where a regionserver going down has enough cycles to do damage to regions @@ -208,6 +207,14 @@ public class TestIOFencing { } super.completeCompaction(compactedFiles); } + + @Override public void setForceFlush() { + return; + } + + @Override public boolean isMemstoreCompaction() { + return false; + } } private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java index f6ade32..b7b3af2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java @@ -19,22 +19,6 @@ package org.apache.hadoop.hbase.io; -import java.io.IOException; -import java.lang.management.ManagementFactory; -import java.lang.management.RuntimeMXBean; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Map; -import java.util.TreeMap; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.CopyOnWriteArraySet; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.KeyValue; @@ -42,9 +26,9 @@ import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; -import org.apache.hadoop.hbase.io.hfile.LruCachedBlock; import org.apache.hadoop.hbase.io.hfile.LruBlockCache; -import org.apache.hadoop.hbase.regionserver.CellSkipListSet; +import org.apache.hadoop.hbase.io.hfile.LruCachedBlock; +import org.apache.hadoop.hbase.regionserver.CellSet; import org.apache.hadoop.hbase.regionserver.DefaultMemStore; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HStore; @@ -56,6 +40,22 @@ import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CopyOnWriteArraySet; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantReadWriteLock; + import static org.junit.Assert.assertEquals; /** @@ -237,8 +237,8 @@ public class TestHeapSize { assertEquals(expected, actual); } - // CellSkipListSet - cl = CellSkipListSet.class; + // CellSet + cl = CellSet.class; expected = ClassSize.estimateBase(cl, false); actual = ClassSize.CELL_SKIPLIST_SET; if (expected != actual) { @@ -305,15 +305,16 @@ public class TestHeapSize { // DefaultMemStore Deep Overhead actual = DefaultMemStore.DEEP_OVERHEAD; expected = ClassSize.estimateBase(cl, false); - expected += ClassSize.estimateBase(AtomicLong.class, false); - expected += (2 * ClassSize.estimateBase(CellSkipListSet.class, false)); + expected += (2 * ClassSize.estimateBase(AtomicLong.class, false)); + expected += (2 * ClassSize.estimateBase(CellSet.class, false)); expected += (2 * ClassSize.estimateBase(ConcurrentSkipListMap.class, false)); expected += (2 * ClassSize.estimateBase(TimeRangeTracker.class, false)); if(expected != actual) { ClassSize.estimateBase(cl, true); ClassSize.estimateBase(AtomicLong.class, true); - ClassSize.estimateBase(CellSkipListSet.class, true); - ClassSize.estimateBase(CellSkipListSet.class, true); + ClassSize.estimateBase(AtomicLong.class, true); + ClassSize.estimateBase(CellSet.class, true); + ClassSize.estimateBase(CellSet.class, true); ClassSize.estimateBase(ConcurrentSkipListMap.class, true); ClassSize.estimateBase(ConcurrentSkipListMap.class, true); ClassSize.estimateBase(TimeRangeTracker.class, true); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellSkipListSet.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellSkipListSet.java index 684839d..1cd3803 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellSkipListSet.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellSkipListSet.java @@ -34,8 +34,8 @@ import org.junit.experimental.categories.Category; @Category({RegionServerTests.class, SmallTests.class}) public class TestCellSkipListSet extends TestCase { - private final CellSkipListSet csls = - new CellSkipListSet(CellComparator.COMPARATOR); + private final CellSet csls = + new CellSet(CellComparator.COMPARATOR); protected void setUp() throws Exception { super.setUp(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactedMemStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactedMemStore.java new file mode 100644 index 0000000..ff96fdf --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactedMemStore.java @@ -0,0 +1,1416 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import com.google.common.base.Joiner; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import junit.framework.TestCase; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeepDeletedCells; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdge; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Threads; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.SortedSet; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +/** + * compacted memstore test case + */ +@Category(MediumTests.class) +public class TestCompactedMemStore extends TestCase { + private static final Log LOG = LogFactory.getLog(TestCompactedMemStore.class); + private static final int ROW_COUNT = 10; + private static final int QUALIFIER_COUNT = ROW_COUNT; + private static final byte[] FAMILY = Bytes.toBytes("column"); + private static MemStoreChunkPool chunkPool; + private CompactedMemStore cms; + private HRegion region; + private HStore store; + private MultiVersionConsistencyControl mvcc; + private AtomicLong startSeqNum = new AtomicLong(0); + + ////////////////////////////////////////////////////////////////////////////// + // Helpers + ////////////////////////////////////////////////////////////////////////////// + private static byte[] makeQualifier(final int i1, final int i2) { + return Bytes.toBytes(Integer.toString(i1) + ";" + + Integer.toString(i2)); + } + + // private KeyValue getDeleteKV(byte [] row) { + // return new KeyValue(row, Bytes.toBytes("test_col"), null, + // HConstants.LATEST_TIMESTAMP, KeyValue.Type.Delete, null); + // } + // + // private KeyValue getKV(byte [] row, byte [] value) { + // return new KeyValue(row, Bytes.toBytes("test_col"), null, + // HConstants.LATEST_TIMESTAMP, value); + // } + private static void addRows(int count, final CompactedMemStore mem) { + long nanos = System.nanoTime(); + + for (int i = 0; i < count; i++) { + if (i % 1000 == 0) { + + System.out.println(i + " Took for 1k usec: " + (System.nanoTime() - nanos) / 1000); + nanos = System.nanoTime(); + } + long timestamp = System.currentTimeMillis(); + + for (int ii = 0; ii < QUALIFIER_COUNT; ii++) { + byte[] row = Bytes.toBytes(i); + byte[] qf = makeQualifier(i, ii); + mem.add(new KeyValue(row, FAMILY, qf, timestamp, qf)); + } + } + } + + static void doScan(AbstractMemStore ms, int iteration) throws IOException { + long nanos = System.nanoTime(); + KeyValueScanner s = ms.getScanners(0).get(0); + s.seek(KeyValueUtil.createFirstOnRow(new byte[] { })); + + System.out.println(iteration + " create/seek took: " + (System.nanoTime() - nanos) / 1000); + int cnt = 0; + while (s.next() != null) ++cnt; + + System.out.println(iteration + " took usec: " + (System.nanoTime() - nanos) / 1000 + " for: " + + cnt); + + } + + @Override + public void tearDown() throws Exception { + chunkPool.clearChunks(); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + this.mvcc = new MultiVersionConsistencyControl(); + Configuration conf = new Configuration(); + conf.setBoolean(MemStoreSegment.USEMSLAB_KEY, true); + conf.setFloat(MemStoreChunkPool.CHUNK_POOL_MAXSIZE_KEY, 0.2f); + conf.setInt(HRegion.MEMSTORE_PERIODIC_FLUSH_INTERVAL, 1000); + HBaseTestingUtility hbaseUtility = HBaseTestingUtility.createLocalHTU(conf); + HColumnDescriptor hcd = new HColumnDescriptor(FAMILY); + this.region = hbaseUtility.createTestRegion("foobar", hcd); + this.store = new HStore(region, hcd, conf); + this.cms = new CompactedMemStore(HBaseConfiguration.create(), CellComparator.COMPARATOR, store); + chunkPool = MemStoreChunkPool.getPool(conf); + assertTrue(chunkPool != null); + } + + public void testPutSameKey() { + byte[] bytes = Bytes.toBytes(getName()); + KeyValue kv = new KeyValue(bytes, bytes, bytes, bytes); + this.cms.add(kv); + byte[] other = Bytes.toBytes("somethingelse"); + KeyValue samekey = new KeyValue(bytes, bytes, bytes, other); + this.cms.add(samekey); + Cell found = this.cms.getActive().first(); + assertEquals(1, this.cms.getActive().getCellsCount()); + assertTrue(Bytes.toString(found.getValueArray()), CellUtil.matchingValue(samekey, found)); + } + + /** + * Test memstore snapshot happening while scanning. + * + * @throws IOException + */ + public void testScanAcrossSnapshot() throws IOException { + int rowCount = addRows(this.cms); + List memstorescanners = this.cms.getScanners(0); + Scan scan = new Scan(); + List result = new ArrayList(); + ScanInfo scanInfo = + new ScanInfo(null, 0, 1, HConstants.LATEST_TIMESTAMP, KeepDeletedCells.FALSE, 0, + this.cms.getComparator()); + ScanType scanType = ScanType.USER_SCAN; + StoreScanner s = new StoreScanner(scan, scanInfo, scanType, null, memstorescanners); + int count = 0; + try { + while (s.next(result)) { + LOG.info(result); + count++; + // Row count is same as column count. + assertEquals(rowCount, result.size()); + result.clear(); + } + } finally { + s.close(); + } + assertEquals(rowCount, count); + for (KeyValueScanner scanner : memstorescanners) { + scanner.close(); + } + + memstorescanners = this.cms.getScanners(mvcc.memstoreReadPoint()); + // Now assert can count same number even if a snapshot mid-scan. + s = new StoreScanner(scan, scanInfo, scanType, null, memstorescanners); + count = 0; + try { + while (s.next(result)) { + LOG.info(result); + // Assert the stuff is coming out in right order. + assertTrue(CellUtil.matchingRow(result.get(0), Bytes.toBytes(count))); + count++; + // Row count is same as column count. + assertEquals(rowCount, result.size()); + if (count == 2) { + // the test should be still correct although the compaction is starting in the background + // there should be nothing to compact + this.cms.snapshot(); + LOG.info("Snapshotted"); + } + result.clear(); + } + } finally { + s.close(); + } + + // snapshot immediately starts compaction, but even with the compaction nothing + // should be compacted (unique keys) and the test should still be correct... + assertEquals(rowCount, count); + for (KeyValueScanner scanner : memstorescanners) { + scanner.close(); + } + memstorescanners = this.cms.getScanners(mvcc.memstoreReadPoint()); + // Assert that new values are seen in kvset as we scan. + long ts = System.currentTimeMillis(); + s = new StoreScanner(scan, scanInfo, scanType, null, memstorescanners); + count = 0; + int snapshotIndex = 5; + try { + while (s.next(result)) { + LOG.info(result); + // Assert the stuff is coming out in right order. + assertTrue(CellUtil.matchingRow(result.get(0), Bytes.toBytes(count))); + // Row count is same as column count. + assertEquals("count=" + count + ", result=" + result, rowCount, result.size()); + count++; + if (count == snapshotIndex) { + MemStoreSnapshot snapshot = this.cms.snapshot(); + this.cms.clearSnapshot(snapshot.getId()); + // Added more rows into kvset. But the scanner wont see these rows. + addRows(this.cms, ts); + LOG.info("Snapshotted, cleared it and then added values (which wont be seen)"); + } + result.clear(); + } + } finally { + s.close(); + } + assertEquals(rowCount, count); + } + + /** + * A simple test which verifies the 3 possible states when scanning across snapshot. + * + * @throws IOException + * @throws CloneNotSupportedException + */ + public void testScanAcrossSnapshot2() throws IOException, CloneNotSupportedException { + // we are going to the scanning across snapshot with two kvs + // kv1 should always be returned before kv2 + final byte[] one = Bytes.toBytes(1); + final byte[] two = Bytes.toBytes(2); + final byte[] f = Bytes.toBytes("f"); + final byte[] q = Bytes.toBytes("q"); + final byte[] v = Bytes.toBytes(3); + + final KeyValue kv1 = new KeyValue(one, f, q, 10, v); + final KeyValue kv2 = new KeyValue(two, f, q, 10, v); + + // use case 1: both kvs in kvset + this.cms.add(kv1.clone()); + this.cms.add(kv2.clone()); + verifyScanAcrossSnapshot2(kv1, kv2); + + // use case 2: both kvs in snapshot + this.cms.snapshot(); + verifyScanAcrossSnapshot2(kv1, kv2); + + // use case 3: first in snapshot second in kvset + this.cms = new CompactedMemStore(HBaseConfiguration.create(), + CellComparator.COMPARATOR, store); + this.cms.add(kv1.clone()); + this.cms + .snapshot(); // As compaction is starting in the background the repetition + this.cms.add( + kv2.clone()); // of the k1 might be removed BUT the scanners created earlier + verifyScanAcrossSnapshot2(kv1, + kv2); // should look on the OLD MemStoreSegment, so this should be OK... + } + + private void verifyScanAcrossSnapshot2(KeyValue kv1, KeyValue kv2) + throws IOException { + List memstorescanners = this.cms.getScanners(mvcc.memstoreReadPoint()); + assertEquals(1, memstorescanners.size()); + final KeyValueScanner scanner = memstorescanners.get(0); + scanner.seek(KeyValueUtil.createFirstOnRow(HConstants.EMPTY_START_ROW)); + assertEquals(kv1, scanner.next()); + assertEquals(kv2, scanner.next()); + assertNull(scanner.next()); + } + + private void assertScannerResults(KeyValueScanner scanner, KeyValue[] expected) + throws IOException { + scanner.seek(KeyValueUtil.createFirstOnRow(new byte[] { })); + List returned = Lists.newArrayList(); + + while (true) { + Cell next = scanner.next(); + if (next == null) break; + returned.add(next); + } + + assertTrue( + "Got:\n" + Joiner.on("\n").join(returned) + + "\nExpected:\n" + Joiner.on("\n").join(expected), + Iterables.elementsEqual(Arrays.asList(expected), returned)); + assertNull(scanner.peek()); + } + + public void testMemstoreConcurrentControl() throws IOException { + final byte[] row = Bytes.toBytes(1); + final byte[] f = Bytes.toBytes("family"); + final byte[] q1 = Bytes.toBytes("q1"); + final byte[] q2 = Bytes.toBytes("q2"); + final byte[] v = Bytes.toBytes("value"); + + MultiVersionConsistencyControl.WriteEntry w = + mvcc.beginMemstoreInsertWithSeqNum(this.startSeqNum.incrementAndGet()); + + KeyValue kv1 = new KeyValue(row, f, q1, v); + kv1.setSequenceId(w.getWriteNumber()); + cms.add(kv1); + + KeyValueScanner s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { }); + + mvcc.completeMemstoreInsert(w); + + s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv1 }); + + w = mvcc.beginMemstoreInsertWithSeqNum(this.startSeqNum.incrementAndGet()); + KeyValue kv2 = new KeyValue(row, f, q2, v); + kv2.setSequenceId(w.getWriteNumber()); + cms.add(kv2); + + s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv1 }); + + mvcc.completeMemstoreInsert(w); + + s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv1, kv2 }); + } + + /** + * Regression test for HBASE-2616, HBASE-2670. + * When we insert a higher-memstoreTS version of a cell but with + * the same timestamp, we still need to provide consistent reads + * for the same scanner. + */ + public void testMemstoreEditsVisibilityWithSameKey() throws IOException { + final byte[] row = Bytes.toBytes(1); + final byte[] f = Bytes.toBytes("family"); + final byte[] q1 = Bytes.toBytes("q1"); + final byte[] q2 = Bytes.toBytes("q2"); + final byte[] v1 = Bytes.toBytes("value1"); + final byte[] v2 = Bytes.toBytes("value2"); + + // INSERT 1: Write both columns val1 + MultiVersionConsistencyControl.WriteEntry w = + mvcc.beginMemstoreInsertWithSeqNum(this.startSeqNum.incrementAndGet()); + + KeyValue kv11 = new KeyValue(row, f, q1, v1); + kv11.setSequenceId(w.getWriteNumber()); + cms.add(kv11); + + KeyValue kv12 = new KeyValue(row, f, q2, v1); + kv12.setSequenceId(w.getWriteNumber()); + cms.add(kv12); + mvcc.completeMemstoreInsert(w); + + // BEFORE STARTING INSERT 2, SEE FIRST KVS + KeyValueScanner s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kv12 }); + + // START INSERT 2: Write both columns val2 + w = mvcc.beginMemstoreInsertWithSeqNum(this.startSeqNum.incrementAndGet()); + KeyValue kv21 = new KeyValue(row, f, q1, v2); + kv21.setSequenceId(w.getWriteNumber()); + cms.add(kv21); + + KeyValue kv22 = new KeyValue(row, f, q2, v2); + kv22.setSequenceId(w.getWriteNumber()); + cms.add(kv22); + + // BEFORE COMPLETING INSERT 2, SEE FIRST KVS + s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kv12 }); + + // COMPLETE INSERT 2 + mvcc.completeMemstoreInsert(w); + + // NOW SHOULD SEE NEW KVS IN ADDITION TO OLD KVS. + // See HBASE-1485 for discussion about what we should do with + // the duplicate-TS inserts + s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv21, kv11, kv22, kv12 }); + } + + /** + * When we insert a higher-memstoreTS deletion of a cell but with + * the same timestamp, we still need to provide consistent reads + * for the same scanner. + */ + public void testMemstoreDeletesVisibilityWithSameKey() throws IOException { + final byte[] row = Bytes.toBytes(1); + final byte[] f = Bytes.toBytes("family"); + final byte[] q1 = Bytes.toBytes("q1"); + final byte[] q2 = Bytes.toBytes("q2"); + final byte[] v1 = Bytes.toBytes("value1"); + // INSERT 1: Write both columns val1 + MultiVersionConsistencyControl.WriteEntry w = + mvcc.beginMemstoreInsertWithSeqNum(this.startSeqNum.incrementAndGet()); + + KeyValue kv11 = new KeyValue(row, f, q1, v1); + kv11.setSequenceId(w.getWriteNumber()); + cms.add(kv11); + + KeyValue kv12 = new KeyValue(row, f, q2, v1); + kv12.setSequenceId(w.getWriteNumber()); + cms.add(kv12); + mvcc.completeMemstoreInsert(w); + + // BEFORE STARTING INSERT 2, SEE FIRST KVS + KeyValueScanner s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kv12 }); + + // START DELETE: Insert delete for one of the columns + w = mvcc.beginMemstoreInsertWithSeqNum(this.startSeqNum.incrementAndGet()); + KeyValue kvDel = new KeyValue(row, f, q2, kv11.getTimestamp(), + KeyValue.Type.DeleteColumn); + kvDel.setSequenceId(w.getWriteNumber()); + cms.add(kvDel); + + // BEFORE COMPLETING DELETE, SEE FIRST KVS + s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kv12 }); + + // COMPLETE DELETE + mvcc.completeMemstoreInsert(w); + + // NOW WE SHOULD SEE DELETE + s = this.cms.getScanners(mvcc.memstoreReadPoint()).get(0); + assertScannerResults(s, new KeyValue[] { kv11, kvDel, kv12 }); + } + + public void testReadOwnWritesUnderConcurrency() throws Throwable { + + int NUM_THREADS = 8; + + ReadOwnWritesTester threads[] = new ReadOwnWritesTester[NUM_THREADS]; + AtomicReference caught = new AtomicReference(); + + for (int i = 0; i < NUM_THREADS; i++) { + threads[i] = new ReadOwnWritesTester(i, cms, mvcc, caught, this.startSeqNum); + threads[i].start(); + } + + for (int i = 0; i < NUM_THREADS; i++) { + threads[i].join(); + } + + if (caught.get() != null) { + throw caught.get(); + } + } + + ///////////////////////////////-/-/-/-//////////////////////////////////////////// + // Get tests + //////////////////////////////-/-/-/-///////////////////////////////////////////// + + /** + * Test memstore snapshots + * + * @throws IOException + */ + public void testSnapshotting() throws IOException { + final int snapshotCount = 5; + // Add some rows, run a snapshot. Do it a few times. + for (int i = 0; i < snapshotCount; i++) { + addRows(this.cms); + runSnapshot(this.cms, true); + assertEquals("History not being cleared", 0, this.cms.getSnapshot().getCellsCount()); + } + } + + public void testMultipleVersionsSimple() throws Exception { + byte[] row = Bytes.toBytes("testRow"); + byte[] family = Bytes.toBytes("testFamily"); + byte[] qf = Bytes.toBytes("testQualifier"); + long[] stamps = { 1, 2, 3 }; + byte[][] values = { Bytes.toBytes("value0"), Bytes.toBytes("value1"), + Bytes.toBytes("value2") }; + KeyValue key0 = new KeyValue(row, family, qf, stamps[0], values[0]); + KeyValue key1 = new KeyValue(row, family, qf, stamps[1], values[1]); + KeyValue key2 = new KeyValue(row, family, qf, stamps[2], values[2]); + + cms.add(key0); + cms.add(key1); + cms.add(key2); + + assertTrue("Expected memstore to hold 3 values, actually has " + + cms.getActive().getCellsCount(), cms.getActive().getCellsCount() == 3); + } + + /** + * Test getNextRow from memstore + * + * @throws InterruptedException + */ + public void testGetNextRow() throws Exception { + addRows(this.cms); + // Add more versions to make it a little more interesting. + Thread.sleep(1); + addRows(this.cms); + Cell closestToEmpty = this.cms.getNextRow(KeyValue.LOWESTKEY); + assertTrue(KeyValue.COMPARATOR.compareRows(closestToEmpty, + new KeyValue(Bytes.toBytes(0), System.currentTimeMillis())) == 0); + for (int i = 0; i < ROW_COUNT; i++) { + Cell nr = this.cms.getNextRow(new KeyValue(Bytes.toBytes(i), + System.currentTimeMillis())); + if (i + 1 == ROW_COUNT) { + assertEquals(nr, null); + } else { + assertTrue(KeyValue.COMPARATOR.compareRows(nr, + new KeyValue(Bytes.toBytes(i + 1), System.currentTimeMillis())) == 0); + } + } + //starting from each row, validate results should contain the starting row + for (int startRowId = 0; startRowId < ROW_COUNT; startRowId++) { + ScanInfo scanInfo = new ScanInfo(FAMILY, 0, 1, Integer.MAX_VALUE, KeepDeletedCells.FALSE, + 0, this.cms.getComparator()); + ScanType scanType = ScanType.USER_SCAN; + InternalScanner scanner = new StoreScanner(new Scan( + Bytes.toBytes(startRowId)), scanInfo, scanType, null, + cms.getScanners(0)); + List results = new ArrayList(); + for (int i = 0; scanner.next(results); i++) { + int rowId = startRowId + i; + Cell left = results.get(0); + byte[] row1 = Bytes.toBytes(rowId); + assertTrue("Row name", + KeyValue.COMPARATOR.compareRows(left.getRowArray(), left.getRowOffset(), + (int) left.getRowLength(), row1, 0, row1.length) == 0); + assertEquals("Count of columns", QUALIFIER_COUNT, results.size()); + List row = new ArrayList(); + for (Cell kv : results) { + row.add(kv); + } + isExpectedRowWithoutTimestamps(rowId, row); + // Clear out set. Otherwise row results accumulate. + results.clear(); + } + } + } + + public void testGet_memstoreAndSnapShot() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] qf4 = Bytes.toBytes("testqualifier4"); + byte[] qf5 = Bytes.toBytes("testqualifier5"); + byte[] val = Bytes.toBytes("testval"); + + //Setting up memstore + cms.add(new KeyValue(row, fam, qf1, val)); + cms.add(new KeyValue(row, fam, qf2, val)); + cms.add(new KeyValue(row, fam, qf3, val)); + //Creating a snapshot + cms.snapshot(); + assertEquals(0, cms.getSnapshot().getCellsCount()); + cms.setForceFlush().snapshot(); + assertEquals(3, cms.getSnapshot().getCellsCount()); + //Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf4, val)); + cms.add(new KeyValue(row, fam, qf5, val)); + assertEquals(2, cms.getActive().getCellsCount()); + } + + ////////////////////////////////////////////////////////////////////////////// + // Delete tests + ////////////////////////////////////////////////////////////////////////////// + public void testGetWithDelete() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier"); + byte[] val = Bytes.toBytes("testval"); + + long ts1 = System.nanoTime(); + KeyValue put1 = new KeyValue(row, fam, qf1, ts1, val); + long ts2 = ts1 + 1; + KeyValue put2 = new KeyValue(row, fam, qf1, ts2, val); + long ts3 = ts2 + 1; + KeyValue put3 = new KeyValue(row, fam, qf1, ts3, val); + cms.add(put1); + cms.add(put2); + cms.add(put3); + + assertEquals(3, cms.getActive().getCellsCount()); + + KeyValue del2 = new KeyValue(row, fam, qf1, ts2, KeyValue.Type.Delete, val); + cms.delete(del2); + + List expected = new ArrayList(); + expected.add(put3); + expected.add(del2); + expected.add(put2); + expected.add(put1); + + assertEquals(4, cms.getActive().getCellsCount()); + int i = 0; + for (Cell cell : cms.getActive().getCellSet()) { + assertEquals(expected.get(i++), cell); + } + } + + public void testGetWithDeleteColumn() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier"); + byte[] val = Bytes.toBytes("testval"); + + long ts1 = System.nanoTime(); + KeyValue put1 = new KeyValue(row, fam, qf1, ts1, val); + long ts2 = ts1 + 1; + KeyValue put2 = new KeyValue(row, fam, qf1, ts2, val); + long ts3 = ts2 + 1; + KeyValue put3 = new KeyValue(row, fam, qf1, ts3, val); + cms.add(put1); + cms.add(put2); + cms.add(put3); + + assertEquals(3, cms.getActive().getCellsCount()); + + KeyValue del2 = + new KeyValue(row, fam, qf1, ts2, KeyValue.Type.DeleteColumn, val); + cms.delete(del2); + + List expected = new ArrayList(); + expected.add(put3); + expected.add(del2); + expected.add(put2); + expected.add(put1); + + assertEquals(4, cms.getActive().getCellsCount()); + int i = 0; + for (Cell cell : cms.getActive().getCellSet()) { + assertEquals(expected.get(i++), cell); + } + } + + public void testGetWithDeleteFamily() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] val = Bytes.toBytes("testval"); + long ts = System.nanoTime(); + + KeyValue put1 = new KeyValue(row, fam, qf1, ts, val); + KeyValue put2 = new KeyValue(row, fam, qf2, ts, val); + KeyValue put3 = new KeyValue(row, fam, qf3, ts, val); + KeyValue put4 = new KeyValue(row, fam, qf3, ts + 1, val); + + cms.add(put1); + cms.add(put2); + cms.add(put3); + cms.add(put4); + + KeyValue del = + new KeyValue(row, fam, null, ts, KeyValue.Type.DeleteFamily, val); + cms.delete(del); + + List expected = new ArrayList(); + expected.add(del); + expected.add(put1); + expected.add(put2); + expected.add(put4); + expected.add(put3); + + assertEquals(5, cms.getActive().getCellsCount()); + int i = 0; + for (Cell cell : cms.getActive().getCellSet()) { + assertEquals(expected.get(i++), cell); + } + } + + public void testKeepDeleteInmemstore() { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf = Bytes.toBytes("testqualifier"); + byte[] val = Bytes.toBytes("testval"); + long ts = System.nanoTime(); + cms.add(new KeyValue(row, fam, qf, ts, val)); + KeyValue delete = new KeyValue(row, fam, qf, ts, KeyValue.Type.Delete, val); + cms.delete(delete); + assertEquals(2, cms.getActive().getCellsCount()); + assertEquals(delete, cms.getActive().first()); + } + + public void testRetainsDeleteVersion() throws IOException { + // add a put to memstore + cms.add(KeyValueTestUtil.create("row1", "fam", "a", 100, "dont-care")); + + // now process a specific delete: + KeyValue delete = KeyValueTestUtil.create( + "row1", "fam", "a", 100, KeyValue.Type.Delete, "dont-care"); + cms.delete(delete); + + assertEquals(2, cms.getActive().getCellsCount()); + assertEquals(delete, cms.getActive().first()); + } + + ////////////////////////////////////=================================================== + //Test for timestamps + //////////////////////////////////// + + public void testRetainsDeleteColumn() throws IOException { + // add a put to memstore + cms.add(KeyValueTestUtil.create("row1", "fam", "a", 100, "dont-care")); + + // now process a specific delete: + KeyValue delete = KeyValueTestUtil.create("row1", "fam", "a", 100, + KeyValue.Type.DeleteColumn, "dont-care"); + cms.delete(delete); + + assertEquals(2, cms.getActive().getCellsCount()); + assertEquals(delete, cms.getActive().first()); + } + + //////////////////////////////////// + //Test for upsert with MSLAB + //////////////////////////////////// + + public void testRetainsDeleteFamily() throws IOException { + // add a put to memstore + cms.add(KeyValueTestUtil.create("row1", "fam", "a", 100, "dont-care")); + + // now process a specific delete: + KeyValue delete = KeyValueTestUtil.create("row1", "fam", "a", 100, + KeyValue.Type.DeleteFamily, "dont-care"); + cms.delete(delete); + + assertEquals(2, cms.getActive().getCellsCount()); + assertEquals(delete, cms.getActive().first()); + } + + /** + * Test to ensure correctness when using Memstore with multiple timestamps + */ + public void testMultipleTimestamps() throws IOException { + long[] timestamps = new long[] { 20, 10, 5, 1 }; + Scan scan = new Scan(); + + for (long timestamp : timestamps) + addRows(cms, timestamp); + + scan.setTimeRange(0, 2); + assertTrue(cms.shouldSeek(scan, Long.MIN_VALUE)); + + scan.setTimeRange(20, 82); + assertTrue(cms.shouldSeek(scan, Long.MIN_VALUE)); + + scan.setTimeRange(10, 20); + assertTrue(cms.shouldSeek(scan, Long.MIN_VALUE)); + + scan.setTimeRange(8, 12); + assertTrue(cms.shouldSeek(scan, Long.MIN_VALUE)); + + /*This test is not required for correctness but it should pass when + * timestamp range optimization is on*/ + //scan.setTimeRange(28, 42); + //assertTrue(!memstore.shouldSeek(scan)); + } + + /** + * Test a pathological pattern that shows why we can't currently + * use the MSLAB for upsert workloads. This test inserts data + * in the following pattern: + * - row0001 through row1000 (fills up one 2M Chunk) + * - row0002 through row1001 (fills up another 2M chunk, leaves one reference + * to the first chunk + * - row0003 through row1002 (another chunk, another dangling reference) + * This causes OOME pretty quickly if we use MSLAB for upsert + * since each 2M chunk is held onto by a single reference. + */ + public void testUpsertMSLAB() throws Exception { + + int ROW_SIZE = 2048; + byte[] qualifier = new byte[ROW_SIZE - 4]; + + MemoryMXBean bean = ManagementFactory.getMemoryMXBean(); + for (int i = 0; i < 3; i++) { + System.gc(); + } + long usageBefore = bean.getHeapMemoryUsage().getUsed(); + + long size = 0; + long ts = 0; + + for (int newValue = 0; newValue < 1000; newValue++) { + for (int row = newValue; row < newValue + 1000; row++) { + byte[] rowBytes = Bytes.toBytes(row); + size += cms.updateColumnValue(rowBytes, FAMILY, qualifier, newValue, ++ts); + } + } + System.out.println("Wrote " + ts + " vals"); + for (int i = 0; i < 3; i++) { + System.gc(); + } + long usageAfter = bean.getHeapMemoryUsage().getUsed(); + System.out.println("Memory used: " + (usageAfter - usageBefore) + + " (heapsize: " + cms.heapSize() + + " size: " + size + ")"); + } + + //////////////////////////////////// + // Test for periodic memstore flushes + // based on time of oldest edit + //////////////////////////////////// + + /** + * Add keyvalues with a fixed memstoreTs, and checks that memstore size is decreased + * as older keyvalues are deleted from the memstore. + * + * @throws Exception + */ + public void testUpsertMemstoreSize() throws Exception { + long oldSize = cms.size(); + + List l = new ArrayList(); + KeyValue kv1 = KeyValueTestUtil.create("r", "f", "q", 100, "v"); + KeyValue kv2 = KeyValueTestUtil.create("r", "f", "q", 101, "v"); + KeyValue kv3 = KeyValueTestUtil.create("r", "f", "q", 102, "v"); + + kv1.setSequenceId(1); + kv2.setSequenceId(1); + kv3.setSequenceId(1); + l.add(kv1); + l.add(kv2); + l.add(kv3); + + this.cms.upsert(l, 2);// readpoint is 2 + long newSize = this.cms.size(); + assert (newSize > oldSize); + //The kv1 should be removed. + assert (cms.getActive().getCellsCount() == 2); + + KeyValue kv4 = KeyValueTestUtil.create("r", "f", "q", 104, "v"); + kv4.setSequenceId(1); + l.clear(); + l.add(kv4); + this.cms.upsert(l, 3); + assertEquals(newSize, this.cms.size()); + //The kv2 should be removed. + assert (cms.getActive().getCellsCount() == 2); + //this.memstore = null; + } + + /** + * Tests that the timeOfOldestEdit is updated correctly for the + * various edit operations in memstore. + * + * @throws Exception + */ + public void testUpdateToTimeOfOldestEdit() throws Exception { + try { + EnvironmentEdgeForMemstoreTest edge = new EnvironmentEdgeForMemstoreTest(); + EnvironmentEdgeManager.injectEdge(edge); + long t = cms.timeOfOldestEdit(); + assertEquals(t, Long.MAX_VALUE); + + // test the case that the timeOfOldestEdit is updated after a KV add + cms.add(KeyValueTestUtil.create("r", "f", "q", 100, "v")); + t = cms.timeOfOldestEdit(); + assertTrue(t == 1234); + // snapshot() after setForceFlush() will reset timeOfOldestEdit. The method will also assert + // the value is reset to Long.MAX_VALUE + + // t = runSnapshot(compacmemstore, false); + t = runSnapshot(cms, true); + + // test the case that the timeOfOldestEdit is updated after a KV delete + cms.delete(KeyValueTestUtil.create("r", "f", "q", 100, "v")); + t = cms.timeOfOldestEdit(); + assertTrue(t == 1234); + + t = runSnapshot(cms, true); + + // test the case that the timeOfOldestEdit is updated after a KV upsert + List l = new ArrayList(); + KeyValue kv1 = KeyValueTestUtil.create("r", "f", "q", 100, "v"); + kv1.setSequenceId(100); + l.add(kv1); + cms.upsert(l, 1000); + t = cms.timeOfOldestEdit(); + assertTrue(t == 1234); + } finally { + EnvironmentEdgeManager.reset(); + } + } + + /** + * Tests the HRegion.shouldFlush method - adds an edit in the memstore + * and checks that shouldFlush returns true, and another where it disables + * the periodic flush functionality and tests whether shouldFlush returns + * false. + * + * @throws Exception + */ + public void testShouldFlush() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(HRegion.MEMSTORE_PERIODIC_FLUSH_INTERVAL, 1000); + checkShouldFlush(conf, true); + // test disable flush + conf.setInt(HRegion.MEMSTORE_PERIODIC_FLUSH_INTERVAL, 0); + checkShouldFlush(conf, false); + } + + private void checkShouldFlush(Configuration conf, boolean expected) throws Exception { + try { + EnvironmentEdgeForMemstoreTest edge = new EnvironmentEdgeForMemstoreTest(); + EnvironmentEdgeManager.injectEdge(edge); + HBaseTestingUtility hbaseUtility = HBaseTestingUtility.createLocalHTU(conf); + HRegion region = hbaseUtility.createTestRegion("foobar", new HColumnDescriptor("foo")); + + List stores = region.getStores(); + assertTrue(stores.size() == 1); + + Store s = stores.iterator().next(); + edge.setCurrentTimeMillis(1234); + s.add(KeyValueTestUtil.create("r", "f", "q", 100, "v")); + edge.setCurrentTimeMillis(1234 + 100); + StringBuffer sb = new StringBuffer(); + assertTrue(!region.shouldFlush(sb)); + edge.setCurrentTimeMillis(1234 + 10000); + assertTrue(region.shouldFlush(sb) == expected); + } finally { + EnvironmentEdgeManager.reset(); + } + } + + /** + * Adds {@link #ROW_COUNT} rows and {@link #QUALIFIER_COUNT} + * + * @param hmc Instance to add rows to. + * @return How many rows we added. + * @throws IOException + */ + private int addRows(final AbstractMemStore hmc) { + return addRows(hmc, HConstants.LATEST_TIMESTAMP); + } + + /** + * Adds {@link #ROW_COUNT} rows and {@link #QUALIFIER_COUNT} + * + * @param hmc Instance to add rows to. + * @return How many rows we added. + * @throws IOException + */ + private int addRows(final AbstractMemStore hmc, final long ts) { + for (int i = 0; i < ROW_COUNT; i++) { + long timestamp = ts == HConstants.LATEST_TIMESTAMP ? + System.currentTimeMillis() : ts; + for (int ii = 0; ii < QUALIFIER_COUNT; ii++) { + byte[] row = Bytes.toBytes(i); + byte[] qf = makeQualifier(i, ii); + hmc.add(new KeyValue(row, FAMILY, qf, timestamp, qf)); + } + } + return ROW_COUNT; + } + + private long runSnapshot(final CompactedMemStore hmc, boolean useForce) + throws IOException { + // Save off old state. + long oldHistorySize = hmc.getSnapshot().getSize(); + long prevTimeStamp = hmc.timeOfOldestEdit(); + if (useForce) hmc.setForceFlush(); + hmc.snapshot(); + MemStoreSnapshot snapshot = hmc.snapshot(); + if (useForce) { + // Make some assertions about what just happened. + assertTrue("History size has not increased", oldHistorySize < snapshot.getSize()); + long t = hmc.timeOfOldestEdit(); + assertTrue("Time of oldest edit is not Long.MAX_VALUE", t == Long.MAX_VALUE); + hmc.clearSnapshot(snapshot.getId()); + } else { + long t = hmc.timeOfOldestEdit(); + assertTrue("Time of oldest edit didn't remain the same", t == prevTimeStamp); + } + return prevTimeStamp; + } + + private void isExpectedRowWithoutTimestamps(final int rowIndex, + List kvs) { + int i = 0; + for (Cell kv : kvs) { + byte[] expectedColname = makeQualifier(rowIndex, i++); + assertTrue("Column name", CellUtil.matchingQualifier(kv, expectedColname)); + // Value is column name as bytes. Usually result is + // 100 bytes in size at least. This is the default size + // for BytesWriteable. For comparison, convert bytes to + // String and trim to remove trailing null bytes. + assertTrue("Content", CellUtil.matchingValue(kv, expectedColname)); + } + } + + @Test + public void testPuttingBackChunksAfterFlushing() throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] qf4 = Bytes.toBytes("testqualifier4"); + byte[] qf5 = Bytes.toBytes("testqualifier5"); + byte[] val = Bytes.toBytes("testval"); + + // Setting up memstore + cms.add(new KeyValue(row, fam, qf1, val)); + cms.add(new KeyValue(row, fam, qf2, val)); + cms.add(new KeyValue(row, fam, qf3, val)); + + // Creating a snapshot + cms.setForceFlush(); + MemStoreSnapshot snapshot = cms.snapshot(); + assertEquals(3, cms.getSnapshot().getCellsCount()); + + // Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf4, val)); + cms.add(new KeyValue(row, fam, qf5, val)); + assertEquals(2, cms.getActive().getCellsCount()); + cms.clearSnapshot(snapshot.getId()); + + int chunkCount = chunkPool.getPoolSize(); + assertTrue(chunkCount > 0); + + } + + @Test + public void testPuttingBackChunksWithOpeningScanner() + throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] qf4 = Bytes.toBytes("testqualifier4"); + byte[] qf5 = Bytes.toBytes("testqualifier5"); + byte[] qf6 = Bytes.toBytes("testqualifier6"); + byte[] qf7 = Bytes.toBytes("testqualifier7"); + byte[] val = Bytes.toBytes("testval"); + + // Setting up memstore + cms.add(new KeyValue(row, fam, qf1, val)); + cms.add(new KeyValue(row, fam, qf2, val)); + cms.add(new KeyValue(row, fam, qf3, val)); + + // Creating a snapshot + cms.setForceFlush(); + MemStoreSnapshot snapshot = cms.snapshot(); + assertEquals(3, cms.getSnapshot().getCellsCount()); + + // Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf4, val)); + cms.add(new KeyValue(row, fam, qf5, val)); + assertEquals(2, cms.getActive().getCellsCount()); + + // opening scanner before clear the snapshot + List scanners = cms.getScanners(0); + // Shouldn't putting back the chunks to pool,since some scanners are opening + // based on their data + cms.clearSnapshot(snapshot.getId()); + + assertTrue(chunkPool.getPoolSize() == 0); + + // Chunks will be put back to pool after close scanners; + for (KeyValueScanner scanner : scanners) { + scanner.close(); + } + assertTrue(chunkPool.getPoolSize() > 0); + + // clear chunks + chunkPool.clearChunks(); + + // Creating another snapshot + cms.setForceFlush(); + snapshot = cms.snapshot(); + // Adding more value + cms.add(new KeyValue(row, fam, qf6, val)); + cms.add(new KeyValue(row, fam, qf7, val)); + // opening scanners + scanners = cms.getScanners(0); + // close scanners before clear the snapshot + for (KeyValueScanner scanner : scanners) { + scanner.close(); + } + // Since no opening scanner, the chunks of snapshot should be put back to + // pool + cms.clearSnapshot(snapshot.getId()); + assertTrue(chunkPool.getPoolSize() > 0); + } + + @Test + public void testPuttingBackChunksWithOpeningPipelineScanner() + throws IOException { + byte[] row = Bytes.toBytes("testrow"); + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf1 = Bytes.toBytes("testqualifier1"); + byte[] qf2 = Bytes.toBytes("testqualifier2"); + byte[] qf3 = Bytes.toBytes("testqualifier3"); + byte[] val = Bytes.toBytes("testval"); + + // Setting up memstore + cms.add(new KeyValue(row, fam, qf1, 1, val)); + cms.add(new KeyValue(row, fam, qf2, 1, val)); + cms.add(new KeyValue(row, fam, qf3, 1, val)); + + // Creating a pipeline + cms.disableCompaction(); + cms.snapshot(); + + // Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf1, 2, val)); + cms.add(new KeyValue(row, fam, qf2, 2, val)); + assertEquals(2, cms.getActive().getCellsCount()); + + // pipeline bucket 2 + cms.snapshot(); + // opening scanner before force flushing + List scanners = cms.getScanners(0); + // Shouldn't putting back the chunks to pool,since some scanners are opening + // based on their data + cms.enableCompaction(); + // trigger compaction + cms.snapshot(); + + // Adding value to "new" memstore + assertEquals(0, cms.getActive().getCellsCount()); + cms.add(new KeyValue(row, fam, qf3, 3, val)); + cms.add(new KeyValue(row, fam, qf2, 3, val)); + cms.add(new KeyValue(row, fam, qf1, 3, val)); + assertEquals(3, cms.getActive().getCellsCount()); + + while (cms.isMemstoreCompaction()) { + Threads.sleep(10); + } + + assertTrue(chunkPool.getPoolSize() == 0); + + // Chunks will be put back to pool after close scanners; + for (KeyValueScanner scanner : scanners) { + scanner.close(); + } + assertTrue(chunkPool.getPoolSize() > 0); + + // clear chunks + chunkPool.clearChunks(); + + // Creating another snapshot + cms.setForceFlush(); + MemStoreSnapshot snapshot = cms.snapshot(); + cms.clearSnapshot(snapshot.getId()); + cms.setForceFlush(); + snapshot = cms.snapshot(); + // Adding more value + cms.add(new KeyValue(row, fam, qf2, 4, val)); + cms.add(new KeyValue(row, fam, qf3, 4, val)); + // opening scanners + scanners = cms.getScanners(0); + // close scanners before clear the snapshot + for (KeyValueScanner scanner : scanners) { + scanner.close(); + } + // Since no opening scanner, the chunks of snapshot should be put back to + // pool + cms.clearSnapshot(snapshot.getId()); + assertTrue(chunkPool.getPoolSize() > 0); + } + + ////////////////////////////////////////////////////////////////////////////// + // Compaction tests + ////////////////////////////////////////////////////////////////////////////// + public void testCompaction1Bucket() throws IOException { + + String[] keys1 = { "A", "A", "B", "C" }; //A1, A2, B3, C4 + + // test 1 bucket + addRowsByKeys(cms, keys1); + assertEquals(704, region.getMemstoreTotalSize()); + + long size = cms.getFlushableSize(); + cms.snapshot(); // push keys to pipeline and compact + region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemstoreCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(528, region.getMemstoreTotalSize()); + + cms.setForceFlush(); + size = cms.getFlushableSize(); + MemStoreSnapshot snapshot = cms.snapshot(); // push keys to snapshot + region.addAndGetGlobalMemstoreSize(-size); // simulate flusher + MemStoreSegment s = cms.getSnapshot(); + SortedSet ss = s.getCellSet(); + assertEquals(3, s.getCellsCount()); + assertEquals(0, region.getMemstoreTotalSize()); + + cms.clearSnapshot(snapshot.getId()); + } + + public void testCompaction2Buckets() throws IOException { + + String[] keys1 = { "A", "A", "B", "C" }; + String[] keys2 = { "A", "B", "D" }; + + addRowsByKeys(cms, keys1); + assertEquals(704, region.getMemstoreTotalSize()); + + long size = cms.getFlushableSize(); + cms.snapshot(); // push keys to pipeline and compact + region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemstoreCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(528, region.getMemstoreTotalSize()); + + addRowsByKeys(cms, keys2); + assertEquals(1056, region.getMemstoreTotalSize()); + + size = cms.getFlushableSize(); + cms.snapshot(); // push keys to pipeline and compact + region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemstoreCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(704, region.getMemstoreTotalSize()); + + cms.setForceFlush(); + size = cms.getFlushableSize(); + MemStoreSnapshot snapshot = cms.snapshot(); // push keys to snapshot + region.addAndGetGlobalMemstoreSize(-size); // simulate flusher + MemStoreSegment s = cms.getSnapshot(); + SortedSet ss = s.getCellSet(); + assertEquals(4, s.getCellsCount()); + assertEquals(0, region.getMemstoreTotalSize()); + + cms.clearSnapshot(snapshot.getId()); + } + + public void testCompaction3Buckets() throws IOException { + + String[] keys1 = { "A", "A", "B", "C" }; + String[] keys2 = { "A", "B", "D" }; + String[] keys3 = { "D", "B", "B" }; + + addRowsByKeys(cms, keys1); + assertEquals(704, region.getMemstoreSize()); + + long size = cms.getFlushableSize(); + cms.snapshot(); // push keys to pipeline and compact + region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemstoreCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(0, region.getMemstoreSize()); + assertEquals(528, region.getMemstoreTotalSize()); + + addRowsByKeys(cms, keys2); + assertEquals(528, region.getMemstoreSize()); + assertEquals(1056, region.getMemstoreTotalSize()); + + cms.disableCompaction(); + size = cms.getFlushableSize(); + cms.snapshot(); // push keys to pipeline without compaction + region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(0, region.getMemstoreSize()); + assertEquals(1056, region.getMemstoreTotalSize()); + + addRowsByKeys(cms, keys3); + assertEquals(528, region.getMemstoreSize()); + assertEquals(1584, region.getMemstoreTotalSize()); + + cms.enableCompaction(); + size = cms.getFlushableSize(); + cms.snapshot(); // push keys to pipeline and compact + region.addAndGetGlobalMemstoreSize(-size); // simulate flusher thread + while (cms.isMemstoreCompaction()) { + Threads.sleep(10); + } + assertEquals(0, cms.getSnapshot().getCellsCount()); + assertEquals(0, region.getMemstoreSize()); + assertEquals(704, region.getMemstoreTotalSize()); + + cms.setForceFlush(); + size = cms.getFlushableSize(); + MemStoreSnapshot snapshot = cms.snapshot(); // push keys to snapshot + region.addAndGetGlobalMemstoreSize(-size); // simulate flusher + MemStoreSegment s = cms.getSnapshot(); + SortedSet ss = s.getCellSet(); + assertEquals(4, s.getCellsCount()); + assertEquals(0, region.getMemstoreSize()); + assertEquals(0, region.getMemstoreTotalSize()); + + cms.clearSnapshot(snapshot.getId()); + } + + private void addRowsByKeys(final AbstractMemStore hmc, String[] keys) { + byte[] fam = Bytes.toBytes("testfamily"); + byte[] qf = Bytes.toBytes("testqualifier"); + for (int i = 0; i < keys.length; i++) { + long timestamp = System.currentTimeMillis(); + Threads.sleep(1); // to make sure each kv gets a different ts + byte[] row = Bytes.toBytes(keys[i]); + byte[] val = Bytes.toBytes(keys[i] + i); + KeyValue kv = new KeyValue(row, fam, qf, timestamp, val); + hmc.add(kv); + LOG.debug("added kv: " + kv.getKeyString() + ", timestamp" + kv.getTimestamp()); + long size = AbstractMemStore.heapSizeChange(kv, true); + region.addAndGetGlobalMemstoreSize(size); + } + } + + private static class ReadOwnWritesTester extends Thread { + static final int NUM_TRIES = 1000; + + final byte[] row; + + final byte[] f = Bytes.toBytes("family"); + final byte[] q1 = Bytes.toBytes("q1"); + + final MultiVersionConsistencyControl mvcc; + final CompactedMemStore compmemstore; + final AtomicLong startSeqNum; + + AtomicReference caughtException; + + public ReadOwnWritesTester(int id, + CompactedMemStore memstore, + MultiVersionConsistencyControl mvcc, + AtomicReference caughtException, + AtomicLong startSeqNum) { + this.mvcc = mvcc; + this.compmemstore = memstore; + this.caughtException = caughtException; + row = Bytes.toBytes(id); + this.startSeqNum = startSeqNum; + } + + public void run() { + try { + internalRun(); + } catch (Throwable t) { + caughtException.compareAndSet(null, t); + } + } + + private void internalRun() throws IOException { + for (long i = 0; i < NUM_TRIES && caughtException.get() == null; i++) { + MultiVersionConsistencyControl.WriteEntry w = + mvcc.beginMemstoreInsert(); + + // Insert the sequence value (i) + byte[] v = Bytes.toBytes(i); + + KeyValue kv = new KeyValue(row, f, q1, i, v); + kv.setSequenceId(w.getWriteNumber()); + compmemstore.add(kv); + mvcc.completeMemstoreInsert(w); + + // Assert that we can read back + KeyValueScanner s = this.compmemstore.getScanners(mvcc.memstoreReadPoint()).get(0); + s.seek(kv); + + Cell ret = s.next(); + assertNotNull("Didnt find own write at all", ret); + assertEquals("Didnt read own writes", + kv.getTimestamp(), ret.getTimestamp()); + } + } + } + + private class EnvironmentEdgeForMemstoreTest implements EnvironmentEdge { + long t = 1234; + + @Override + public long currentTime() { + return t; + } + public void setCurrentTimeMillis(long t) { + this.t = t; + } + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java index 4848d66..93231fb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java @@ -18,17 +18,10 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.lang.management.ManagementFactory; -import java.lang.management.MemoryMXBean; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; - +import com.google.common.base.Joiner; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import junit.framework.TestCase; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -57,9 +50,14 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.wal.WALFactory; import org.junit.experimental.categories.Category; -import com.google.common.base.Joiner; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; /** memstore test case */ @Category({RegionServerTests.class, MediumTests.class}) @@ -86,11 +84,9 @@ public class TestDefaultMemStore extends TestCase { byte [] other = Bytes.toBytes("somethingelse"); KeyValue samekey = new KeyValue(bytes, bytes, bytes, other); this.memstore.add(samekey); - Cell found = this.memstore.cellSet.first(); - assertEquals(1, this.memstore.cellSet.size()); - assertTrue( - Bytes.toString(found.getValueArray(), found.getValueOffset(), found.getValueLength()), - CellUtil.matchingValue(samekey, found)); + Cell found = this.memstore.getActive().first(); + assertEquals(1, this.memstore.getActive().getCellsCount()); + assertTrue(Bytes.toString(found.getValueArray()), CellUtil.matchingValue(samekey, found)); } /** @@ -104,7 +100,7 @@ public class TestDefaultMemStore extends TestCase { List result = new ArrayList(); ScanInfo scanInfo = new ScanInfo(null, 0, 1, HConstants.LATEST_TIMESTAMP, KeepDeletedCells.FALSE, 0, - this.memstore.comparator); + this.memstore.getComparator()); ScanType scanType = ScanType.USER_SCAN; StoreScanner s = new StoreScanner(scan, scanInfo, scanType, null, memstorescanners); int count = 0; @@ -472,7 +468,7 @@ public class TestDefaultMemStore extends TestCase { for (int i = 0; i < snapshotCount; i++) { addRows(this.memstore); runSnapshot(this.memstore); - assertEquals("History not being cleared", 0, this.memstore.snapshot.size()); + assertEquals("History not being cleared", 0, this.memstore.getSnapshot().getCellsCount()); } } @@ -493,7 +489,7 @@ public class TestDefaultMemStore extends TestCase { m.add(key2); assertTrue("Expected memstore to hold 3 values, actually has " + - m.cellSet.size(), m.cellSet.size() == 3); + m.getActive().getCellsCount(), m.getActive().getCellsCount() == 3); } ////////////////////////////////////////////////////////////////////////////// @@ -524,7 +520,7 @@ public class TestDefaultMemStore extends TestCase { //starting from each row, validate results should contain the starting row for (int startRowId = 0; startRowId < ROW_COUNT; startRowId++) { ScanInfo scanInfo = new ScanInfo(FAMILY, 0, 1, Integer.MAX_VALUE, KeepDeletedCells.FALSE, - 0, this.memstore.comparator); + 0, this.memstore.getComparator()); ScanType scanType = ScanType.USER_SCAN; InternalScanner scanner = new StoreScanner(new Scan( Bytes.toBytes(startRowId)), scanInfo, scanType, null, @@ -565,12 +561,12 @@ public class TestDefaultMemStore extends TestCase { memstore.add(new KeyValue(row, fam ,qf3, val)); //Creating a snapshot memstore.snapshot(); - assertEquals(3, memstore.snapshot.size()); + assertEquals(3, memstore.getSnapshot().getCellsCount()); //Adding value to "new" memstore - assertEquals(0, memstore.cellSet.size()); + assertEquals(0, memstore.getActive().getCellsCount()); memstore.add(new KeyValue(row, fam ,qf4, val)); memstore.add(new KeyValue(row, fam ,qf5, val)); - assertEquals(2, memstore.cellSet.size()); + assertEquals(2, memstore.getActive().getCellsCount()); } ////////////////////////////////////////////////////////////////////////////// @@ -592,7 +588,7 @@ public class TestDefaultMemStore extends TestCase { memstore.add(put2); memstore.add(put3); - assertEquals(3, memstore.cellSet.size()); + assertEquals(3, memstore.getActive().getCellsCount()); KeyValue del2 = new KeyValue(row, fam, qf1, ts2, KeyValue.Type.Delete, val); memstore.delete(del2); @@ -603,9 +599,9 @@ public class TestDefaultMemStore extends TestCase { expected.add(put2); expected.add(put1); - assertEquals(4, memstore.cellSet.size()); + assertEquals(4, memstore.getActive().getCellsCount()); int i = 0; - for(Cell cell : memstore.cellSet) { + for(Cell cell : memstore.getActive().getCellSet()) { assertEquals(expected.get(i++), cell); } } @@ -626,7 +622,7 @@ public class TestDefaultMemStore extends TestCase { memstore.add(put2); memstore.add(put3); - assertEquals(3, memstore.cellSet.size()); + assertEquals(3, memstore.getActive().getCellsCount()); KeyValue del2 = new KeyValue(row, fam, qf1, ts2, KeyValue.Type.DeleteColumn, val); @@ -639,9 +635,9 @@ public class TestDefaultMemStore extends TestCase { expected.add(put1); - assertEquals(4, memstore.cellSet.size()); + assertEquals(4, memstore.getActive().getCellsCount()); int i = 0; - for (Cell cell: memstore.cellSet) { + for (Cell cell: memstore.getActive().getCellSet()) { assertEquals(expected.get(i++), cell); } } @@ -679,9 +675,9 @@ public class TestDefaultMemStore extends TestCase { - assertEquals(5, memstore.cellSet.size()); + assertEquals(5, memstore.getActive().getCellsCount()); int i = 0; - for (Cell cell: memstore.cellSet) { + for (Cell cell: memstore.getActive().getCellSet()) { assertEquals(expected.get(i++), cell); } } @@ -695,8 +691,8 @@ public class TestDefaultMemStore extends TestCase { memstore.add(new KeyValue(row, fam, qf, ts, val)); KeyValue delete = new KeyValue(row, fam, qf, ts, KeyValue.Type.Delete, val); memstore.delete(delete); - assertEquals(2, memstore.cellSet.size()); - assertEquals(delete, memstore.cellSet.first()); + assertEquals(2, memstore.getActive().getCellsCount()); + assertEquals(delete, memstore.getActive().first()); } public void testRetainsDeleteVersion() throws IOException { @@ -708,8 +704,8 @@ public class TestDefaultMemStore extends TestCase { "row1", "fam", "a", 100, KeyValue.Type.Delete, "dont-care"); memstore.delete(delete); - assertEquals(2, memstore.cellSet.size()); - assertEquals(delete, memstore.cellSet.first()); + assertEquals(2, memstore.getActive().getCellsCount()); + assertEquals(delete, memstore.getActive().first()); } public void testRetainsDeleteColumn() throws IOException { // add a put to memstore @@ -720,8 +716,8 @@ public class TestDefaultMemStore extends TestCase { KeyValue.Type.DeleteColumn, "dont-care"); memstore.delete(delete); - assertEquals(2, memstore.cellSet.size()); - assertEquals(delete, memstore.cellSet.first()); + assertEquals(2, memstore.getActive().getCellsCount()); + assertEquals(delete, memstore.getActive().first()); } public void testRetainsDeleteFamily() throws IOException { // add a put to memstore @@ -732,8 +728,8 @@ public class TestDefaultMemStore extends TestCase { KeyValue.Type.DeleteFamily, "dont-care"); memstore.delete(delete); - assertEquals(2, memstore.cellSet.size()); - assertEquals(delete, memstore.cellSet.first()); + assertEquals(2, memstore.getActive().getCellsCount()); + assertEquals(delete, memstore.getActive().first()); } //////////////////////////////////// @@ -787,7 +783,7 @@ public class TestDefaultMemStore extends TestCase { */ public void testUpsertMSLAB() throws Exception { Configuration conf = HBaseConfiguration.create(); - conf.setBoolean(DefaultMemStore.USEMSLAB_KEY, true); + conf.setBoolean(MemStoreSegment.USEMSLAB_KEY, true); memstore = new DefaultMemStore(conf, CellComparator.COMPARATOR); int ROW_SIZE = 2048; @@ -830,7 +826,7 @@ public class TestDefaultMemStore extends TestCase { public void testUpsertMemstoreSize() throws Exception { Configuration conf = HBaseConfiguration.create(); memstore = new DefaultMemStore(conf, CellComparator.COMPARATOR); - long oldSize = memstore.size.get(); + long oldSize = memstore.size(); List l = new ArrayList(); KeyValue kv1 = KeyValueTestUtil.create("r", "f", "q", 100, "v"); @@ -841,18 +837,18 @@ public class TestDefaultMemStore extends TestCase { l.add(kv1); l.add(kv2); l.add(kv3); this.memstore.upsert(l, 2);// readpoint is 2 - long newSize = this.memstore.size.get(); + long newSize = this.memstore.size(); assert(newSize > oldSize); //The kv1 should be removed. - assert(memstore.cellSet.size() == 2); - + assert(memstore.getActive().getCellsCount() == 2); + KeyValue kv4 = KeyValueTestUtil.create("r", "f", "q", 104, "v"); kv4.setSequenceId(1); l.clear(); l.add(kv4); this.memstore.upsert(l, 3); - assertEquals(newSize, this.memstore.size.get()); + assertEquals(newSize, this.memstore.size()); //The kv2 should be removed. - assert(memstore.cellSet.size() == 2); + assert(memstore.getActive().getCellsCount() == 2); //this.memstore = null; } @@ -1013,10 +1009,10 @@ public class TestDefaultMemStore extends TestCase { private long runSnapshot(final DefaultMemStore hmc) throws UnexpectedStateException { // Save off old state. - int oldHistorySize = hmc.snapshot.size(); + int oldHistorySize = hmc.getSnapshot().getCellsCount(); MemStoreSnapshot snapshot = hmc.snapshot(); // Make some assertions about what just happened. - assertTrue("History size has not increased", oldHistorySize < hmc.snapshot.size()); + assertTrue("History size has not increased", oldHistorySize < hmc.getSnapshot().getCellsCount()); long t = memstore.timeOfOldestEdit(); assertTrue("Time of oldest edit is not Long.MAX_VALUE", t == Long.MAX_VALUE); hmc.clearSnapshot(snapshot.getId()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java index 4ed918c..266f89b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java @@ -18,25 +18,10 @@ */ package org.apache.hadoop.hbase.regionserver; -import java.io.IOException; -import java.security.Key; -import java.security.SecureRandom; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Date; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableSet; -import java.util.concurrent.ConcurrentSkipListSet; - -import javax.crypto.spec.SecretKeySpec; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.HarFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; @@ -58,9 +43,7 @@ import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.mob.MobConstants; import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.monitoring.MonitoredTask; -import org.apache.hadoop.hbase.regionserver.StoreFile.Reader; import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; import org.apache.hadoop.hbase.regionserver.compactions.NoLimitCompactionThroughputController; import org.apache.hadoop.hbase.security.EncryptionUtil; import org.apache.hadoop.hbase.security.User; @@ -76,6 +59,19 @@ import org.junit.experimental.categories.Category; import org.junit.rules.TestName; import org.mockito.Mockito; +import javax.crypto.spec.SecretKeySpec; +import java.io.IOException; +import java.security.Key; +import java.security.SecureRandom; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableSet; +import java.util.concurrent.ConcurrentSkipListSet; + @Category(MediumTests.class) public class TestHMobStore { public static final Log LOG = LogFactory.getLog(TestHMobStore.class); @@ -469,7 +465,7 @@ public class TestHMobStore { this.store.snapshot(); flushStore(store, id++); Assert.assertEquals(storeFilesSize, this.store.getStorefiles().size()); - Assert.assertEquals(0, ((DefaultMemStore)this.store.memstore).cellSet.size()); + Assert.assertEquals(0, ((DefaultMemStore)this.store.memstore).getActive().getCellsCount()); } /** diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index 56a9d4b..07294cb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -18,50 +18,10 @@ */ package org.apache.hadoop.hbase.regionserver; - -import static org.apache.hadoop.hbase.HBaseTestingUtility.COLUMNS; -import static org.apache.hadoop.hbase.HBaseTestingUtility.FIRST_CHAR; -import static org.apache.hadoop.hbase.HBaseTestingUtility.LAST_CHAR; -import static org.apache.hadoop.hbase.HBaseTestingUtility.START_KEY; -import static org.apache.hadoop.hbase.HBaseTestingUtility.fam1; -import static org.apache.hadoop.hbase.HBaseTestingUtility.fam2; -import static org.apache.hadoop.hbase.HBaseTestingUtility.fam3; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyBoolean; -import static org.mockito.Matchers.anyLong; -import static org.mockito.Matchers.argThat; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.io.InterruptedIOException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.NavigableMap; -import java.util.TreeMap; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; - +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.protobuf.ByteString; import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -70,31 +30,10 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.CompatibilitySingletonFactory; -import org.apache.hadoop.hbase.DroppedSnapshotException; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HBaseTestCase; -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.HConstants.OperationStatusCode; -import org.apache.hadoop.hbase.HDFSBlocksDistribution; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.MiniHBaseCluster; -import org.apache.hadoop.hbase.MultithreadedTestUtil; import org.apache.hadoop.hbase.MultithreadedTestUtil.RepeatingTestThread; import org.apache.hadoop.hbase.MultithreadedTestUtil.TestThread; -import org.apache.hadoop.hbase.NotServingRegionException; -import org.apache.hadoop.hbase.RegionTooBusyException; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.Tag; -import org.apache.hadoop.hbase.TagType; -import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.client.Append; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Durability; @@ -165,10 +104,31 @@ import org.mockito.ArgumentCaptor; import org.mockito.ArgumentMatcher; import org.mockito.Mockito; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.protobuf.ByteString; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +import static org.apache.hadoop.hbase.HBaseTestingUtility.*; +import static org.junit.Assert.*; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Matchers.argThat; +import static org.mockito.Mockito.*; /** * Basic stand-alone testing of HRegion. No clusters! @@ -2343,10 +2303,10 @@ public class TestHRegion { // This is kinda hacky, but better than nothing... long now = System.currentTimeMillis(); DefaultMemStore memstore = (DefaultMemStore) ((HStore) region.getStore(fam1)).memstore; - Cell firstCell = memstore.cellSet.first(); + Cell firstCell = ((HStore) region.getStore(fam1)).memstore.getActive().first(); assertTrue(firstCell.getTimestamp() <= now); now = firstCell.getTimestamp(); - for (Cell cell : memstore.cellSet) { + for (Cell cell : memstore.getActive().getCellSet()) { assertTrue(cell.getTimestamp() <= now); now = cell.getTimestamp(); } @@ -5916,7 +5876,7 @@ public class TestHRegion { public void testOpenRegionWrittenToWALForLogReplay() throws Exception { // similar to the above test but with distributed log replay final ServerName serverName = ServerName.valueOf("testOpenRegionWrittenToWALForLogReplay", - 100, 42); + 100, 42); final RegionServerServices rss = spy(TEST_UTIL.createMockRegionServerService(serverName)); HTableDescriptor htd diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMemStoreChunkPool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMemStoreChunkPool.java index 80333e8..af33bc6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMemStoreChunkPool.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMemStoreChunkPool.java @@ -18,12 +18,6 @@ */ package org.apache.hadoop.hbase.regionserver; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.util.List; -import java.util.Random; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.testclassification.RegionServerTests; @@ -36,6 +30,13 @@ import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; +import java.io.IOException; +import java.util.List; +import java.util.Random; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + /** * Test the {@link MemStoreChunkPool} class */ @@ -47,7 +48,7 @@ public class TestMemStoreChunkPool { @BeforeClass public static void setUpBeforeClass() throws Exception { - conf.setBoolean(DefaultMemStore.USEMSLAB_KEY, true); + conf.setBoolean(MemStoreSegment.USEMSLAB_KEY, true); conf.setFloat(MemStoreChunkPool.CHUNK_POOL_MAXSIZE_KEY, 0.2f); chunkPoolDisabledBeforeTest = MemStoreChunkPool.chunkPoolDisabled; MemStoreChunkPool.chunkPoolDisabled = false; @@ -116,13 +117,13 @@ public class TestMemStoreChunkPool { // Creating a snapshot MemStoreSnapshot snapshot = memstore.snapshot(); - assertEquals(3, memstore.snapshot.size()); + assertEquals(3, memstore.getSnapshot().getCellsCount()); // Adding value to "new" memstore - assertEquals(0, memstore.cellSet.size()); + assertEquals(0, memstore.getActive().getCellsCount()); memstore.add(new KeyValue(row, fam, qf4, val)); memstore.add(new KeyValue(row, fam, qf5, val)); - assertEquals(2, memstore.cellSet.size()); + assertEquals(2, memstore.getActive().getCellsCount()); memstore.clearSnapshot(snapshot.getId()); int chunkCount = chunkPool.getPoolSize(); @@ -132,7 +133,7 @@ public class TestMemStoreChunkPool { @Test public void testPuttingBackChunksWithOpeningScanner() - throws UnexpectedStateException { + throws IOException { byte[] row = Bytes.toBytes("testrow"); byte[] fam = Bytes.toBytes("testfamily"); byte[] qf1 = Bytes.toBytes("testqualifier1"); @@ -153,13 +154,13 @@ public class TestMemStoreChunkPool { // Creating a snapshot MemStoreSnapshot snapshot = memstore.snapshot(); - assertEquals(3, memstore.snapshot.size()); + assertEquals(3, memstore.getSnapshot().getCellsCount()); // Adding value to "new" memstore - assertEquals(0, memstore.cellSet.size()); + assertEquals(0, memstore.getActive().getCellsCount()); memstore.add(new KeyValue(row, fam, qf4, val)); memstore.add(new KeyValue(row, fam, qf5, val)); - assertEquals(2, memstore.cellSet.size()); + assertEquals(2, memstore.getActive().getCellsCount()); // opening scanner before clear the snapshot List scanners = memstore.getScanners(0); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java index a1a58bc..94d00b2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java @@ -19,25 +19,7 @@ package org.apache.hadoop.hbase.regionserver; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; - -import java.io.IOException; -import java.lang.ref.SoftReference; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableSet; -import java.util.concurrent.ConcurrentSkipListSet; -import java.util.concurrent.atomic.AtomicBoolean; +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -73,8 +55,6 @@ import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration; import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; import org.apache.hadoop.hbase.regionserver.compactions.NoLimitCompactionThroughputController; -import org.apache.hadoop.hbase.wal.DefaultWALProvider; -import org.apache.hadoop.hbase.wal.WALFactory; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -82,6 +62,8 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.IncrementingEnvironmentEdge; import org.apache.hadoop.hbase.util.ManualEnvironmentEdge; +import org.apache.hadoop.hbase.wal.DefaultWALProvider; +import org.apache.hadoop.hbase.wal.WALFactory; import org.apache.hadoop.util.Progressable; import org.junit.After; import org.junit.Assert; @@ -92,7 +74,21 @@ import org.junit.experimental.categories.Category; import org.junit.rules.TestName; import org.mockito.Mockito; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.lang.ref.SoftReference; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableSet; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.junit.Assert.*; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.*; /** * Test class for the Store @@ -555,7 +551,7 @@ public class TestStore { this.store.snapshot(); flushStore(store, id++); Assert.assertEquals(storeFilessize, this.store.getStorefiles().size()); - Assert.assertEquals(0, ((DefaultMemStore)this.store.memstore).cellSet.size()); + Assert.assertEquals(0, ((DefaultMemStore)this.store.memstore).getActive().getCellsCount()); } private void assertCheck() { @@ -600,7 +596,7 @@ public class TestStore { flushStore(store, id++); Assert.assertEquals(1, this.store.getStorefiles().size()); // from the one we inserted up there, and a new one - Assert.assertEquals(2, ((DefaultMemStore)this.store.memstore).cellSet.size()); + Assert.assertEquals(2, ((DefaultMemStore)this.store.memstore).getActive().getCellsCount()); // how many key/values for this row are there? Get get = new Get(row); @@ -674,7 +670,7 @@ public class TestStore { } long computedSize=0; - for (Cell cell : ((DefaultMemStore)this.store.memstore).cellSet) { + for (Cell cell : ((DefaultMemStore)this.store.memstore).getActive().getCellSet()) { long kvsize = DefaultMemStore.heapSizeChange(cell, true); //System.out.println(kv + " size= " + kvsize + " kvsize= " + kv.heapSize()); computedSize += kvsize; @@ -706,7 +702,7 @@ public class TestStore { // then flush. flushStore(store, id++); Assert.assertEquals(1, this.store.getStorefiles().size()); - Assert.assertEquals(1, ((DefaultMemStore)this.store.memstore).cellSet.size()); + Assert.assertEquals(1, ((DefaultMemStore)this.store.memstore).getActive().getCellsCount()); // now increment again: newValue += 1; -- 1.7.10.2 (Apple Git-33)