From 519642144937808e6ee7e1de972103d461442e8c Mon Sep 17 00:00:00 2001 From: stack Date: Wed, 13 Apr 2016 21:49:28 -0700 Subject: [PATCH] HBASE-15650 Remove TimeRangeTracker as point of contention when many threads reading a StoreFile Refactor so we use the immutable, unsynchronized TimeRange when doing time-based checks at read time rather than use heavily synchronized TimeRangeTracker; let TimeRangeTracker be for write-time only. M hbase-common/src/main/java/org/apache/hadoop/hbase/io/TimeRange.java Make allTime final. Add a includesTimeRange method copied from TimeRangeTracker. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java Change name of a few methods so they match TimeRange methods that do same thing. (getTimeRangeTracker, getTimeRange, toTimeRange) add utility methods M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Change Reader to use TimeRange-based checks instead of TimeRangeTracker. --- .../java/org/apache/hadoop/hbase/io/TimeRange.java | 25 +++++++---- .../hadoop/hbase/io/hfile/HFilePrettyPrinter.java | 3 +- .../hadoop/hbase/regionserver/DefaultMemStore.java | 7 ++-- .../hadoop/hbase/regionserver/StoreFile.java | 48 ++++++++-------------- .../hbase/regionserver/TimeRangeTracker.java | 48 ++++++++++++++++++---- .../hbase/mapreduce/TestHFileOutputFormat.java | 7 ++-- .../hbase/mapreduce/TestHFileOutputFormat2.java | 8 ++-- .../hadoop/hbase/regionserver/MockStoreFile.java | 8 ++-- .../hbase/regionserver/TestTimeRangeTracker.java | 12 +++--- 9 files changed, 95 insertions(+), 71 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/TimeRange.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/TimeRange.java index 4ec062d..a5f665c 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/TimeRange.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/TimeRange.java @@ -40,7 +40,7 @@ public class TimeRange { private static final long MAX_TIME = Long.MAX_VALUE; private long minStamp = MIN_TIME; private long maxStamp = MAX_TIME; - private boolean allTime = false; + private final boolean allTime; /** * Default constructor. @@ -60,9 +60,7 @@ public class TimeRange { @Deprecated public TimeRange(long minStamp) { this.minStamp = minStamp; - if (this.minStamp == MIN_TIME){ - this.allTime = true; - } + this.allTime = this.minStamp == MIN_TIME; } /** @@ -73,6 +71,7 @@ public class TimeRange { @Deprecated public TimeRange(byte [] minStamp) { this.minStamp = Bytes.toLong(minStamp); + this.allTime = false; } /** @@ -94,9 +93,7 @@ public class TimeRange { } this.minStamp = minStamp; this.maxStamp = maxStamp; - if (this.minStamp == MIN_TIME && this.maxStamp == MAX_TIME){ - this.allTime = true; - } + this.allTime = this.minStamp == MIN_TIME && this.maxStamp == MAX_TIME; } /** @@ -157,12 +154,24 @@ public class TimeRange { * @return true if within TimeRange, false if not */ public boolean withinTimeRange(long timestamp) { - if(allTime) return true; + if (allTime) return true; // check if >= minStamp return (minStamp <= timestamp && timestamp < maxStamp); } /** + * Check if the range has any overlap with TimeRange + * @param tr TimeRange + * @return True if there is overlap, false otherwise + */ + // This method came from TimeRangeTracker. We used to go there for this function but better + // to come here to the immutable, unsynchronized datastructure at read time. + public synchronized boolean includesTimeRange(final TimeRange tr) { + if (this.allTime) return true; + return getMin() < tr.getMax() && getMax() >= tr.getMin(); + } + + /** * Check if the specified timestamp is within this TimeRange. *

* Returns true if within interval [minStamp, maxStamp), false diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java index f083f8d..a4dce65 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java @@ -401,8 +401,7 @@ public class HFilePrettyPrinter extends Configured implements Tool { } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) { TimeRangeTracker timeRangeTracker = new TimeRangeTracker(); Writables.copyWritable(e.getValue(), timeRangeTracker); - System.out.println(timeRangeTracker.getMinimumTimestamp() + "...." - + timeRangeTracker.getMaximumTimestamp()); + System.out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax()); } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) { System.out.println(Bytes.toInt(e.getValue())); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java index a3d4dfd..fe6d550 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java @@ -678,10 +678,9 @@ public class DefaultMemStore implements MemStore { timeRange = scan.getTimeRange(); } return (timeRangeTracker.includesTimeRange(timeRange) || - snapshotTimeRangeTracker.includesTimeRange(timeRange)) - && (Math.max(timeRangeTracker.getMaximumTimestamp(), - snapshotTimeRangeTracker.getMaximumTimestamp()) >= - oldestUnexpiredTS); + snapshotTimeRangeTracker.includesTimeRange(timeRange)) && + (Math.max(timeRangeTracker.getMax(), snapshotTimeRangeTracker.getMax()) >= + oldestUnexpiredTS); } /* diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index 8ffdfdd..d563672 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -18,12 +18,6 @@ */ package org.apache.hadoop.hbase.regionserver; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Ordering; - import java.io.DataInput; import java.io.IOException; import java.net.InetSocketAddress; @@ -66,9 +60,14 @@ import org.apache.hadoop.hbase.util.BloomFilter; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.BloomFilterWriter; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.io.WritableUtils; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Ordering; + /** * A Store data file. Stores usually have one or more of these files. They * are produced by flushing the memstore to disk. To @@ -503,15 +502,11 @@ public class StoreFile { reader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META); try { - byte [] timerangeBytes = metadataMap.get(TIMERANGE_KEY); - if (timerangeBytes != null) { - this.reader.timeRangeTracker = new TimeRangeTracker(); - Writables.copyWritable(timerangeBytes, this.reader.timeRangeTracker); - } + this.reader.timeRange = TimeRangeTracker.getTimeRange(metadataMap.get(TIMERANGE_KEY)); } catch (IllegalArgumentException e) { LOG.error("Error reading timestamp range data from meta -- " + "proceeding without", e); - this.reader.timeRangeTracker = null; + this.reader.timeRange = null; } // initialize so we can reuse them after reader closed. firstKey = reader.getFirstKey(); @@ -535,7 +530,7 @@ public class StoreFile { } catch (IOException e) { try { boolean evictOnClose = - cacheConf != null? cacheConf.shouldEvictOnClose(): true; + cacheConf != null? cacheConf.shouldEvictOnClose(): true; this.closeReader(evictOnClose); } catch (IOException ee) { } @@ -581,7 +576,7 @@ public class StoreFile { */ public void deleteReader() throws IOException { boolean evictOnClose = - cacheConf != null? cacheConf.shouldEvictOnClose(): true; + cacheConf != null? cacheConf.shouldEvictOnClose(): true; closeReader(evictOnClose); this.fs.delete(getPath(), true); } @@ -743,15 +738,11 @@ public class StoreFile { } public Long getMinimumTimestamp() { - return (getReader().timeRangeTracker == null) ? - null : - getReader().timeRangeTracker.getMinimumTimestamp(); + return getReader().timeRange == null? null: getReader().timeRange.getMin(); } public Long getMaximumTimestamp() { - return (getReader().timeRangeTracker == null) ? - null : - getReader().timeRangeTracker.getMaximumTimestamp(); + return getReader().timeRange == null? null: getReader().timeRange.getMax(); } @@ -1108,12 +1099,11 @@ public class StoreFile { */ public static class Reader { private static final Log LOG = LogFactory.getLog(Reader.class.getName()); - protected BloomFilter generalBloomFilter = null; protected BloomFilter deleteFamilyBloomFilter = null; protected BloomType bloomFilterType; private final HFile.Reader reader; - protected TimeRangeTracker timeRangeTracker = null; + protected TimeRange timeRange = null; protected long sequenceID = -1; private byte[] lastBloomKey; private long deleteFamilyCnt = -1; @@ -1207,7 +1197,7 @@ public class StoreFile { public boolean isReferencedInReads() { return refCount.get() != 0; } - + /** * @return true if the file is compacted */ @@ -1261,12 +1251,8 @@ public class StoreFile { * @return false if queried keys definitely don't exist in this StoreFile */ boolean passesTimerangeFilter(TimeRange timeRange, long oldestUnexpiredTS) { - if (timeRangeTracker == null) { - return true; - } else { - return timeRangeTracker.includesTimeRange(timeRange) && - timeRangeTracker.getMaximumTimestamp() >= oldestUnexpiredTS; - } + return timeRange == null? true: + timeRange.includesTimeRange(timeRange) && timeRange.getMax() >= oldestUnexpiredTS; } /** @@ -1666,7 +1652,7 @@ public class StoreFile { } public long getMaxTimestamp() { - return timeRangeTracker == null ? Long.MAX_VALUE : timeRangeTracker.getMaximumTimestamp(); + return timeRange == null ? Long.MAX_VALUE : timeRange.getMax(); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java index beadde6..882cef1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java @@ -26,14 +26,16 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.TimeRange; +import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.io.Writable; /** - * Stores the minimum and maximum timestamp values (both are inclusive). - * Can be used to find if any given time range overlaps with its time range - * MemStores use this class to track its minimum and maximum timestamps. + * Stores minimum and maximum timestamp values. Both timestamps are inclusive. + * Use this class at write- time ONLY. Too much synchronization to use at read time. + * Use {@link TimeRange} at read time. See toTimeRange() to make TimeRange to use. + * MemStores use this class to track minimum and maximum timestamps. * When writing StoreFiles, this information is stored in meta blocks and used - * at read time to match against the required TimeRange. + * at read time via an instance of {@link TimeRange} to test if Cells fit the StoreFile TimeRange. */ @InterfaceAudience.Private public class TimeRangeTracker implements Writable { @@ -52,7 +54,7 @@ public class TimeRangeTracker implements Writable { * @param trt source TimeRangeTracker */ public TimeRangeTracker(final TimeRangeTracker trt) { - set(trt.getMinimumTimestamp(), trt.getMaximumTimestamp()); + set(trt.getMin(), trt.getMax()); } public TimeRangeTracker(long minimumTimestamp, long maximumTimestamp) { @@ -75,7 +77,7 @@ public class TimeRangeTracker implements Writable { } /** - * Update the current TimestampRange to include the timestamp from Cell + * Update the current TimestampRange to include the timestamp from cell. * If the Key is of type DeleteColumn or DeleteFamily, it includes the * entire time range from 0 to timestamp of the key. * @param cell the Cell to include @@ -128,14 +130,14 @@ public class TimeRangeTracker implements Writable { /** * @return the minimumTimestamp */ - public synchronized long getMinimumTimestamp() { + public synchronized long getMin() { return minimumTimestamp; } /** * @return the maximumTimestamp */ - public synchronized long getMaximumTimestamp() { + public synchronized long getMax() { return maximumTimestamp; } @@ -153,4 +155,34 @@ public class TimeRangeTracker implements Writable { public synchronized String toString() { return "[" + minimumTimestamp + "," + maximumTimestamp + "]"; } + + /** + * @return An instance of TimeRangeTracker filled w/ the content of serialized + * TimeRangeTracker in timeRangeTrackerBytes. + * @throws IOException + */ + public static TimeRangeTracker getTimeRangeTracker(final byte [] timeRangeTrackerBytes) + throws IOException { + if (timeRangeTrackerBytes == null) return null; + TimeRangeTracker trt = new TimeRangeTracker(); + Writables.copyWritable(timeRangeTrackerBytes, trt); + return trt; + } + + /** + * @return An instance of a TimeRange made from the serialized TimeRangeTracker passed in + * timeRangeTrackerBytes. + * @throws IOException + */ + static TimeRange getTimeRange(final byte [] timeRangeTrackerBytes) throws IOException { + TimeRangeTracker trt = getTimeRangeTracker(timeRangeTrackerBytes); + return trt == null? null: trt.toTimeRange(); + } + + /** + * @return Make a TimeRange from current state of this. + */ + TimeRange toTimeRange() throws IOException { + return new TimeRange(getMin(), getMax()); + } } \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java index 4c8bdc2..a6eef1b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java @@ -286,10 +286,9 @@ public class TestHFileOutputFormat { // unmarshall and check values. TimeRangeTracker timeRangeTracker = new TimeRangeTracker(); Writables.copyWritable(range, timeRangeTracker); - LOG.info(timeRangeTracker.getMinimumTimestamp() + - "...." + timeRangeTracker.getMaximumTimestamp()); - assertEquals(1000, timeRangeTracker.getMinimumTimestamp()); - assertEquals(2000, timeRangeTracker.getMaximumTimestamp()); + LOG.info(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax()); + assertEquals(1000, timeRangeTracker.getMin()); + assertEquals(2000, timeRangeTracker.getMax()); rd.close(); } finally { if (writer != null && context != null) writer.close(context); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java index 17d5df5..757e938 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java @@ -293,10 +293,10 @@ public class TestHFileOutputFormat2 { // unmarshall and check values. TimeRangeTracker timeRangeTracker = new TimeRangeTracker(); Writables.copyWritable(range, timeRangeTracker); - LOG.info(timeRangeTracker.getMinimumTimestamp() + - "...." + timeRangeTracker.getMaximumTimestamp()); - assertEquals(1000, timeRangeTracker.getMinimumTimestamp()); - assertEquals(2000, timeRangeTracker.getMaximumTimestamp()); + LOG.info(timeRangeTracker.getMin() + + "...." + timeRangeTracker.getMax()); + assertEquals(1000, timeRangeTracker.getMin()); + assertEquals(2000, timeRangeTracker.getMax()); rd.close(); } finally { if (writer != null && context != null) writer.close(context); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MockStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MockStoreFile.java index 9663426..70623e9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MockStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MockStoreFile.java @@ -112,12 +112,12 @@ public class MockStoreFile extends StoreFile { public Long getMinimumTimestamp() { return (timeRangeTracker == null) ? - null : timeRangeTracker.getMinimumTimestamp(); + null : timeRangeTracker.getMin(); } public Long getMaximumTimestamp() { return (timeRangeTracker == null) ? - null : timeRangeTracker.getMaximumTimestamp(); + null : timeRangeTracker.getMax(); } @Override @@ -133,7 +133,7 @@ public class MockStoreFile extends StoreFile { @Override public StoreFile.Reader getReader() { final long len = this.length; - final TimeRangeTracker timeRange = this.timeRangeTracker; + final TimeRangeTracker timeRangeTracker = this.timeRangeTracker; final long entries = this.entryCount; return new StoreFile.Reader() { @Override @@ -143,7 +143,7 @@ public class MockStoreFile extends StoreFile { @Override public long getMaxTimestamp() { - return timeRange == null ? Long.MAX_VALUE : timeRange.maximumTimestamp; + return timeRange == null? Long.MAX_VALUE: timeRangeTracker.getMax(); } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTimeRangeTracker.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTimeRangeTracker.java index 963a9e4..815622a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTimeRangeTracker.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTimeRangeTracker.java @@ -36,8 +36,8 @@ public class TestTimeRangeTracker { trr.includeTimestamp(3); trr.includeTimestamp(2); trr.includeTimestamp(1); - assertTrue(trr.getMinimumTimestamp() != TimeRangeTracker.INITIAL_MINIMUM_TIMESTAMP); - assertTrue(trr.getMaximumTimestamp() != -1 /*The initial max value*/); + assertTrue(trr.getMin() != TimeRangeTracker.INITIAL_MINIMUM_TIMESTAMP); + assertTrue(trr.getMax() != -1 /*The initial max value*/); } @Test @@ -80,8 +80,8 @@ public class TestTimeRangeTracker { for (int i = 0; i < threads.length; i++) { threads[i].join(); } - assertTrue(trr.getMaximumTimestamp() == calls * threadCount); - assertTrue(trr.getMinimumTimestamp() == 0); + assertTrue(trr.getMax() == calls * threadCount); + assertTrue(trr.getMin() == 0); } @Test @@ -141,7 +141,7 @@ public class TestTimeRangeTracker { for (int i = 0; i < threads.length; i++) { threads[i].join(); } - System.out.println(trr.getMinimumTimestamp() + " " + trr.getMaximumTimestamp() + " " + + System.out.println(trr.getMin() + " " + trr.getMax() + " " + (System.currentTimeMillis() - start)); } -} +} \ No newline at end of file -- 2.6.1