From 58e1c3fc791daa17425fbf2acbc8c5e56a73b5ad Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 9 May 2018 12:15:51 -0700 Subject: [PATCH] HBASE-20483 [PERFORMANCE] Flushing is 2x slower in hbase2. Test of flush for 1.4. --- .../apache/hadoop/hbase/regionserver/HRegion.java | 2 +- .../apache/hadoop/hbase/regionserver/HStore.java | 4 +- .../hbase/regionserver/TestFlushPerformance.java | 163 +++++++++++++++++++++ 3 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFlushPerformance.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index e95a3929ab..1f078e0e37 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -287,7 +287,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // TODO: account for each registered handler in HeapSize computation private Map coprocessorServiceHandlers = Maps.newHashMap(); - private final AtomicLong memstoreSize = new AtomicLong(0); + final AtomicLong memstoreSize = new AtomicLong(0); // Debug possible data loss due to WAL off final Counter numMutationsWithoutWAL = new Counter(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 804da38d6f..cbb40455f0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -129,7 +129,9 @@ public class HStore implements Store { private static final Log LOG = LogFactory.getLog(HStore.class); - protected final MemStore memstore; + @VisibleForTesting + final MemStore memstore; + // This stores directory in the filesystem. private final HRegion region; private final HColumnDescriptor family; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFlushPerformance.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFlushPerformance.java new file mode 100644 index 0000000000..1f023928ab --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFlushPerformance.java @@ -0,0 +1,163 @@ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CancelableProgressable; +import org.apache.hadoop.hbase.wal.WAL; +import org.apache.hadoop.hbase.wal.WALFactory; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Random; + +import static org.junit.Assert.assertTrue; + + +/** + * Test with a main that is about loading a region and then re-using the load just to profile + * region flush rate. Needed because hbase2 is slower than hbase1 flushing. + */ +@Category(LargeTests.class) +public class TestFlushPerformance { + @Rule public TestName name = new TestName(); + + private static final Logger LOG = LoggerFactory.getLogger(TestFlushPerformance.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static WALFactory WALFACTORY; + private static final Random RANDOM = new Random(); + private static final String FAMILY_NAME = "cf"; + private static final byte [] FAMILY_NAME_BYTES = Bytes.toBytes(FAMILY_NAME); + private static final int TOTAL_ROWS = 400000; + private static final int COLUMNS = 10; + private static final byte [] VALUE = Bytes.toBytes("Some old value"); + + @BeforeClass + public static void beforeClass() throws IOException { + // Make it so we use the DisableWALProvider; i.e. no WAL + // TODO: Doc. how to get a disabled WAL. Too hard to figure currently. + Configuration conf = TEST_UTIL.getConfiguration(); + conf.setBoolean("hbase.regionserver.hlog.enabled", false); + assertTrue(!conf.getBoolean("hbase.regionserver.hlog.enabled", true)); + WALFACTORY = new WALFactory(conf,null, TestFlushPerformance.class.getSimpleName()); + } + + @Test + public void testFlush() throws IOException { + HRegion region = getRegion(TEST_UTIL.getConfiguration(), this.name.getMethodName()); + load(region, TOTAL_ROWS); + long dataSize = region.getMemstoreSize(); + try { + for (;;) { + DefaultMemStore.Section activeSection = + ((DefaultMemStore)((HStore)region.getStore(FAMILY_NAME_BYTES)).memstore).activeSection; + HRegion.FlushResult flushResult = region.flush(true); + assertTrue(flushResult.isFlushSucceeded()); + // Put the reference back in place to see if we can flush again. Restore datasize too. + ((DefaultMemStore)((HStore)region.getStore(FAMILY_NAME_BYTES)).memstore).activeSection = + activeSection; + region.memstoreSize.set(dataSize); + } + } finally { + region.close(); + } + } + + private HRegion getRegion(Configuration conf, String tableName) throws IOException { + HColumnDescriptor hcd = new HColumnDescriptor(FAMILY_NAME); + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName)); + htd.setMemStoreFlushSize(Long.MAX_VALUE/1024/*Big number to stop auto-flushing!*/); + htd.addFamily(hcd); + RegionServerServices rss = Mockito.mock(RegionServerServices.class); + Mockito.when(rss.getServerName()).thenReturn(ServerName.valueOf("serverName", 0, 0)); + FlushRequester flushRequester = Mockito.mock(FlushRequester.class); + Mockito.when(rss.getFlushRequester()).thenReturn(flushRequester); + HRegionInfo ri = new HRegionInfo(htd.getTableName()); + Path tableDir = TEST_UTIL.getDataTestDir(); + HRegionFileSystem rfs = new HRegionFileSystem(conf, FileSystem.get(conf), tableDir, ri); + WAL wal = WALFACTORY.getWAL(ri.getEncodedNameAsBytes(), ri.getTable().getNamespace()); + HRegion region = new HRegion(rfs, wal, conf, htd, rss); + CancelableProgressable cancelableProgressable = Mockito.mock(CancelableProgressable.class); + return region.openHRegion(cancelableProgressable); + } + + private static void load(HRegion region, int totalRows) throws IOException { + for (int i = 0; i < totalRows; i++) { + Put put = new Put(getRandomRow(totalRows)); + for (int column = 0; column < COLUMNS; column++) { + byte[] qualifier = Bytes.toBytes("" + column + 1); + put.addColumn(FAMILY_NAME_BYTES, qualifier, VALUE); + } + region.put(put); + } + } + + // Below methods are stolen from PerformanceEvaluation. TODO: Move into hbase-common. + + private static byte [] getRandomRow(final int totalRows) { + return format(generateRandomRow(totalRows)); + } + + private static byte [] format(final int number) { + byte [] b = new byte[26]; + int d = Math.abs(number); + for (int i = b.length - 1; i >= 0; i--) { + b[i] = (byte)((d % 10) + '0'); + d /= 10; + } + return b; + } + + private static int generateRandomRow(int totalRows) { + return RANDOM.nextInt(Integer.MAX_VALUE) % totalRows; + } + + /* + * This method takes some time and is done inline uploading data. For + * example, doing the mapfile test, generation of the key and value + * consumes about 30% of CPU time. + * @return Generated random value to insert into a table cell. + */ + private static byte[] generateData(int length) { + byte [] b = new byte [length]; + int i; + + for(i = 0; i < (length-8); i += 8) { + b[i] = (byte) (65 + RANDOM.nextInt(26)); + b[i+1] = b[i]; + b[i+2] = b[i]; + b[i+3] = b[i]; + b[i+4] = b[i]; + b[i+5] = b[i]; + b[i+6] = b[i]; + b[i+7] = b[i]; + } + + byte a = (byte) (65 + RANDOM.nextInt(26)); + for(; i < length; i++) { + b[i] = a; + } + return b; + } + + public static void main(String args[]) { + org.junit.runner.JUnitCore.main(TestFlushPerformance.class.getName()); + } +} -- 2.16.3