From af86f742b125a1138ee9e8e2c0fa6e2b99cd0985 Mon Sep 17 00:00:00 2001 From: Mike Drob Date: Fri, 1 Jun 2018 20:59:50 -0500 Subject: [PATCH] HBASE-20674 Clean up SCR docs and Xsum code --- .../org/apache/hadoop/hbase/fs/HFileSystem.java | 51 ++++---- .../hadoop/hbase/regionserver/HRegionServer.java | 3 +- .../java/org/apache/hadoop/hbase/util/FSUtils.java | 81 ------------ .../{TestChecksum.java => BaseTestChecksum.java} | 143 ++++++++------------- .../hadoop/hbase/io/hfile/TestChecksumLocalFS.java | 55 ++++++++ .../hbase/io/hfile/TestChecksumWithHdfs.java | 61 +++++++++ src/main/asciidoc/_chapters/performance.adoc | 65 +++++++--- src/main/asciidoc/_chapters/schema_design.adoc | 34 +---- 8 files changed, 247 insertions(+), 246 deletions(-) rename hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/{TestChecksum.java => BaseTestChecksum.java} (77%) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksumLocalFS.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksumWithHdfs.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/HFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/HFileSystem.java index bc3d85e1f2..ee0b1c01e1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/HFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/HFileSystem.java @@ -76,43 +76,40 @@ public class HFileSystem extends FilterFileSystem { * checksum verfication in hbase, otherwise * delegate checksum verification to the FileSystem. */ - public HFileSystem(Configuration conf, boolean useHBaseChecksum) - throws IOException { - - // Create the default filesystem with checksum verification switched on. - // By default, any operation to this FilterFileSystem occurs on - // the underlying filesystem that has checksums switched on. + public HFileSystem(final Configuration conf, final boolean useHBaseChecksum) throws IOException { + // The default file system that is used by all FilterFileSystem operations will have + // checksum verification turned on. We'll create an additional FileSystem handle later that may + // have checksum verification disabled, and that one will be used by our internal hot paths. this.fs = FileSystem.get(conf); this.useHBaseChecksum = useHBaseChecksum; - fs.initialize(getDefaultUri(conf), conf); + this.fs.initialize(getDefaultUri(conf), conf); + + boolean localFS = this.fs instanceof LocalFileSystem; // disable checksum verification for local fileSystem, see HBASE-11218 - if (fs instanceof LocalFileSystem) { - fs.setWriteChecksum(false); - fs.setVerifyChecksum(false); + if (localFS) { + this.fs.setWriteChecksum(false); + this.fs.setVerifyChecksum(false); } addLocationsOrderInterceptor(conf); - // If hbase checksum verification is switched on, then create a new - // filesystem object that has cksum verification turned off. - // We will avoid verifying checksums in the fs client, instead do it - // inside of hbase. - // If this is the local file system hadoop has a bug where seeks - // do not go to the correct location if setVerifyChecksum(false) is called. - // This manifests itself in that incorrect data is read and HFileBlocks won't be able to read - // their header magic numbers. See HBASE-5885 - if (useHBaseChecksum && !(fs instanceof LocalFileSystem)) { - conf = new Configuration(conf); - conf.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true); - this.noChecksumFs = maybeWrapFileSystem(newInstanceFileSystem(conf), conf); - this.noChecksumFs.setVerifyChecksum(false); - } else { - this.noChecksumFs = maybeWrapFileSystem(fs, conf); - } - this.fs = maybeWrapFileSystem(this.fs, conf); + + // If HBase checksum verification is enabled, create a new FS instance with checksum + // verification turned off. We will avoid double-checking sums by skipping them in FS client + // and doing it only in HBase. + // LocalFS doesn't support SCR, so nothing left to configure in that case. + if (useHBaseChecksum && !localFS) { + Configuration checksumConf = new Configuration(conf); + // Not all Hadoop versions have HdfsClientConfigKeys.Read.ShortCircuit.SKIP_CHECKSUM_KEY + checksumConf.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true); + this.noChecksumFs = maybeWrapFileSystem(newInstanceFileSystem(checksumConf), checksumConf); + this.noChecksumFs.setVerifyChecksum(false); + } else { + this.noChecksumFs = this.fs; + } } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index aec94d4155..67234adbf9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -143,6 +143,7 @@ import org.apache.hadoop.hbase.trace.SpanReceiverHost; import org.apache.hadoop.hbase.trace.TraceUtil; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.CompressionTest; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSTableDescriptors; @@ -544,7 +545,7 @@ public class HRegionServer extends HasThread implements HFile.checkHFileVersion(this.conf); checkCodecs(this.conf); this.userProvider = UserProvider.instantiate(conf); - FSUtils.setupShortCircuitRead(this.conf); + CommonFSUtils.setupShortCircuitRead(this.conf); decorateRegionServerConfiguration(this.conf); // Disable usage of meta replicas in the regionserver diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java index 5b968db188..5d13b04110 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -129,36 +129,6 @@ public abstract class FSUtils extends CommonFSUtils { return fileSystem instanceof DistributedFileSystem; } - /** - * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the - * '/a/b/c' part. If you passed in 'hdfs://a/b/c and b/c, it would return true. Does not consider - * schema; i.e. if schemas different but path or subpath matches, the two will equate. - * @param pathToSearch Path we will be trying to match. - * @param pathTail - * @return True if pathTail is tail on the path of pathToSearch - */ - public static boolean isMatchingTail(final Path pathToSearch, final Path pathTail) { - if (pathToSearch.depth() != pathTail.depth()) return false; - Path tailPath = pathTail; - String tailName; - Path toSearch = pathToSearch; - String toSearchName; - boolean result = false; - do { - tailName = tailPath.getName(); - if (tailName == null || tailName.length() <= 0) { - result = true; - break; - } - toSearchName = toSearch.getName(); - if (toSearchName == null || toSearchName.length() <= 0) break; - // Move up a parent on each path for next go around. Path doesn't let us go off the end. - tailPath = tailPath.getParent(); - toSearch = toSearch.getParent(); - } while(tailName.equals(toSearchName)); - return result; - } - public static FSUtils getInstance(FileSystem fs, Configuration conf) { String scheme = fs.getUri().getScheme(); if (scheme == null) { @@ -172,21 +142,6 @@ public abstract class FSUtils extends CommonFSUtils { return fsUtils; } - /** - * Delete the region directory if exists. - * @param conf - * @param hri - * @return True if deleted the region directory. - * @throws IOException - */ - public static boolean deleteRegionDir(final Configuration conf, final HRegionInfo hri) - throws IOException { - Path rootDir = getRootDir(conf); - FileSystem fs = rootDir.getFileSystem(conf); - return deleteDirectory(fs, - new Path(getTableDir(rootDir, hri.getTable()), hri.getEncodedName())); - } - /** * Create the specified file on the filesystem. By default, this will: *
    @@ -1678,42 +1633,6 @@ public abstract class FSUtils extends CommonFSUtils { LOG.info(overheadMsg); } - /** - * Do our short circuit read setup. - * Checks buffer size to use and whether to do checksumming in hbase or hdfs. - * @param conf - */ - public static void setupShortCircuitRead(final Configuration conf) { - // Check that the user has not set the "dfs.client.read.shortcircuit.skip.checksum" property. - boolean shortCircuitSkipChecksum = - conf.getBoolean("dfs.client.read.shortcircuit.skip.checksum", false); - boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true); - if (shortCircuitSkipChecksum) { - LOG.warn("Configuration \"dfs.client.read.shortcircuit.skip.checksum\" should not " + - "be set to true." + (useHBaseChecksum ? " HBase checksum doesn't require " + - "it, see https://issues.apache.org/jira/browse/HBASE-6868." : "")); - assert !shortCircuitSkipChecksum; //this will fail if assertions are on - } - checkShortCircuitReadBufferSize(conf); - } - - /** - * Check if short circuit read buffer size is set and if not, set it to hbase value. - * @param conf - */ - public static void checkShortCircuitReadBufferSize(final Configuration conf) { - final int defaultSize = HConstants.DEFAULT_BLOCKSIZE * 2; - final int notSet = -1; - // DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY is only defined in h2 - final String dfsKey = "dfs.client.read.shortcircuit.buffer.size"; - int size = conf.getInt(dfsKey, notSet); - // If a size is set, return -- we will use it. - if (size != notSet) return; - // But short circuit buffer size is normally not set. Put in place the hbase wanted size. - int hbaseSize = conf.getInt("hbase." + dfsKey, defaultSize); - conf.setIfUnset(dfsKey, Integer.toString(hbaseSize)); - } - /** * @param c * @return The DFSClient DFSHedgedReadMetrics instance or null if can't be found or not on hdfs. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/BaseTestChecksum.java similarity index 77% rename from hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java rename to hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/BaseTestChecksum.java index dd8ebb3567..443907fdd0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/BaseTestChecksum.java @@ -20,86 +20,58 @@ package org.apache.hadoop.hbase.io.hfile; import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.GZ; import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.NONE; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.nio.BufferUnderflowException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; + import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.nio.ByteBuff; -import org.apache.hadoop.hbase.testclassification.IOTests; -import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.ChecksumType; import org.junit.Before; -import org.junit.ClassRule; import org.junit.Test; -import org.junit.experimental.categories.Category; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@Category({IOTests.class, SmallTests.class}) -public class TestChecksum { +public abstract class BaseTestChecksum { + private static final Logger LOG = LoggerFactory.getLogger(BaseTestChecksum.class); - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestChecksum.class); + static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = { NONE, GZ }; + static final int[] BYTES_PER_CHECKSUM = { 50, 500, 688, 16*1024, (16*1024+980), 64 * 1024 }; - private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlock.class); - - static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = { - NONE, GZ }; - - static final int[] BYTES_PER_CHECKSUM = { - 50, 500, 688, 16*1024, (16*1024+980), 64 * 1024}; - - private static final HBaseTestingUtility TEST_UTIL = - new HBaseTestingUtility(); - private FileSystem fs; - private HFileSystem hfs; + protected static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + protected FileSystem fs; + protected HFileSystem hfs; @Before public void setUp() throws Exception { fs = HFileSystem.get(TEST_UTIL.getConfiguration()); - hfs = (HFileSystem)fs; + hfs = (HFileSystem) fs; } @Test public void testNewBlocksHaveDefaultChecksum() throws IOException { Path path = new Path(TEST_UTIL.getDataTestDir(), "default_checksum"); - FSDataOutputStream os = fs.create(path); - HFileContext meta = new HFileContextBuilder().build(); - HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); - DataOutputStream dos = hbw.startWriting(BlockType.DATA); - for (int i = 0; i < 1000; ++i) - dos.writeInt(i); - hbw.writeHeaderAndData(os); - int totalSize = hbw.getOnDiskSizeWithHeader(); - os.close(); + int totalSize = writeFile(fs, path, ChecksumType.getDefaultChecksumType()); - // Use hbase checksums. - assertEquals(true, hfs.useHBaseChecksum()); + assertTrue("Should use HBase Checksums", hfs.useHBaseChecksum()); - FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path); - meta = new HFileContextBuilder().withHBaseCheckSum(true).build(); - HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl( - is, totalSize, (HFileSystem) fs, path, meta); - HFileBlock b = hbr.readBlockData(0, -1, false, false); - assertEquals(b.getChecksumType(), ChecksumType.getDefaultChecksumType().getCode()); + HFileBlock b = readBlockData(fs, path, totalSize); + assertEquals("HBase checksum is not correct type", b.getChecksumType(), + ChecksumType.getDefaultChecksumType().getCode()); } /** @@ -107,46 +79,45 @@ public class TestChecksum { */ @Test public void testAllChecksumTypes() throws IOException { - List cktypes = new ArrayList<>(Arrays.asList(ChecksumType.values())); - for (Iterator itr = cktypes.iterator(); itr.hasNext(); ) { - ChecksumType cktype = itr.next(); + for (ChecksumType cktype : ChecksumType.values()) { Path path = new Path(TEST_UTIL.getDataTestDir(), "checksum" + cktype.getName()); - FSDataOutputStream os = fs.create(path); - HFileContext meta = new HFileContextBuilder() - .withChecksumType(cktype) - .build(); - HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); - DataOutputStream dos = hbw.startWriting(BlockType.DATA); - for (int i = 0; i < 1000; ++i) { - dos.writeInt(i); - } - hbw.writeHeaderAndData(os); - int totalSize = hbw.getOnDiskSizeWithHeader(); - os.close(); + int totalSize = writeFile(fs, path, cktype); - // Use hbase checksums. - assertEquals(true, hfs.useHBaseChecksum()); + assertTrue("Should use HBase Checksums", hfs.useHBaseChecksum()); - FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path); - meta = new HFileContextBuilder().withHBaseCheckSum(true).build(); - HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl( - is, totalSize, (HFileSystem) fs, path, meta); - HFileBlock b = hbr.readBlockData(0, -1, false, false); - ByteBuff data = b.getBufferWithoutHeader(); + ByteBuff data = readBlockData(fs, path, totalSize).getBufferWithoutHeader(); for (int i = 0; i < 1000; i++) { assertEquals(i, data.getInt()); } - boolean exception_thrown = false; try { data.getInt(); + fail("No exception thrown for " + cktype); } catch (BufferUnderflowException e) { - exception_thrown = true; } - assertTrue(exception_thrown); assertEquals(0, HFile.getAndResetChecksumFailuresCount()); } } + protected int writeFile(FileSystem fs, Path path, ChecksumType cktype) throws IOException { + try (FSDataOutputStream os = fs.create(path)) { + HFileContext meta = new HFileContextBuilder().withChecksumType(cktype).build(); + HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); + DataOutputStream dos = hbw.startWriting(BlockType.DATA); + for (int i = 0; i < 1000; ++i) { + dos.writeInt(i); + } + hbw.writeHeaderAndData(os); + return hbw.getOnDiskSizeWithHeader(); + } + } + + private HFileBlock readBlockData(FileSystem fs, Path path, int size) throws IOException { + FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path); + HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true).build(); + HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, size, (HFileSystem) fs, path, meta); + return hbr.readBlockData(0, -1, false, false); + } + /** * Introduce checksum failures and check that we can still read * the data @@ -160,10 +131,9 @@ public class TestChecksum { protected void testChecksumCorruptionInternals(boolean useTags) throws IOException { for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { for (boolean pread : new boolean[] { false, true }) { - LOG.info("testChecksumCorruption: Compression algorithm: " + algo + - ", pread=" + pread); - Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_" - + algo); + LOG.info("testChecksumCorruption: Compression algorithm={} pread={}", algo, pread); + final Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_" + algo); + LOG.info("path={}", path); FSDataOutputStream os = fs.create(path); HFileContext meta = new HFileContextBuilder() .withCompression(algo) @@ -182,8 +152,7 @@ public class TestChecksum { } os.close(); - // Use hbase checksums. - assertEquals(true, hfs.useHBaseChecksum()); + assertTrue("Should use HBase Checksums", hfs.useHBaseChecksum()); // Do a read that purposely introduces checksum verification failures. FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path); @@ -213,19 +182,18 @@ public class TestChecksum { // A single instance of hbase checksum failure causes the reader to // switch off hbase checksum verification for the next 100 read // requests. Verify that this is correct. - for (int i = 0; i < - HFileBlock.CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD + 1; i++) { - b = hbr.readBlockData(0, -1, pread, false); + for (int i = 0; i < HFileBlock.CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD + 1; i++) { + hbr.readBlockData(0, -1, pread, false); assertEquals(0, HFile.getAndResetChecksumFailuresCount()); } // The next read should have hbase checksum verification reanabled, // we verify this by assertng that there was a hbase-checksum failure. - b = hbr.readBlockData(0, -1, pread, false); + hbr.readBlockData(0, -1, pread, false); assertEquals(1, HFile.getAndResetChecksumFailuresCount()); // Since the above encountered a checksum failure, we switch // back to not checking hbase checksums. - b = hbr.readBlockData(0, -1, pread, false); + hbr.readBlockData(0, -1, pread, false); assertEquals(0, HFile.getAndResetChecksumFailuresCount()); is.close(); @@ -233,7 +201,7 @@ public class TestChecksum { // the configuration. In this case, we should not detect // any retries within hbase. HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false); - assertEquals(false, newfs.useHBaseChecksum()); + assertFalse("HBase Checksums should be disabled", newfs.useHBaseChecksum()); is = new FSDataInputStreamWrapper(newfs, path); hbr = new CorruptedFSReaderImpl(is, totalSize, newfs, path, meta); b = hbr.readBlockData(0, -1, pread, false); @@ -299,14 +267,11 @@ public class TestChecksum { long expectedChunks = ChecksumUtil.numChunks( dataSize + HConstants.HFILEBLOCK_HEADER_SIZE, bytesPerChecksum); - LOG.info("testChecksumChunks: pread=" + pread + - ", bytesPerChecksum=" + bytesPerChecksum + - ", fileSize=" + totalSize + - ", dataSize=" + dataSize + - ", expectedChunks=" + expectedChunks); + LOG.info("testChecksumChunks: " + + "pread={}, bytesPerChecksum={}, fileSize={}, dataSize={}, expectedChunks={}", + pread, bytesPerChecksum, totalSize, dataSize, expectedChunks); - // Verify hbase checksums. - assertEquals(true, hfs.useHBaseChecksum()); + assertTrue("Should use HBase Checksums", hfs.useHBaseChecksum()); // Read data back from file. FSDataInputStream is = fs.open(path); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksumLocalFS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksumLocalFS.java new file mode 100644 index 0000000000..be56a072b2 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksumLocalFS.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.apache.hadoop.fs.ChecksumFileSystem; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.testclassification.IOTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.ChecksumType; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({IOTests.class, SmallTests.class}) +public class TestChecksumLocalFS extends BaseTestChecksum { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestChecksumLocalFS.class); + + @Test + public void testNoLocalChecksums() throws IOException { + Path path = new Path(TEST_UTIL.getDataTestDir(), "local_checksum"); + writeFile(fs, path, ChecksumType.getDefaultChecksumType()); + + FileSystem wrappedFs = hfs.getRawFileSystem(); + assertTrue("We should be on LocalFS", wrappedFs instanceof LocalFileSystem); + ChecksumFileSystem checksumFileSystem = (ChecksumFileSystem) wrappedFs; + Path checkFile = checksumFileSystem.getChecksumFile(path); + assertFalse("Should not be using FS checksums on local FS", + checksumFileSystem.getRawFileSystem().exists(checkFile)); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksumWithHdfs.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksumWithHdfs.java new file mode 100644 index 0000000000..3776dadfe0 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksumWithHdfs.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import static org.junit.Assert.assertEquals; + +import java.lang.reflect.Field; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.testclassification.IOTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({IOTests.class, MediumTests.class}) +public class TestChecksumWithHdfs extends BaseTestChecksum { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestChecksumWithHdfs.class); + + @BeforeClass + public static void startCluster() throws Exception { + TEST_UTIL.startMiniCluster(); + } + + @Test + public void testNotDoubleChecksumming() throws Exception { + HFileSystem fs = (HFileSystem) TEST_UTIL.getTestFileSystem(); + FileSystem dfs = fs.getNoChecksumFs(); + + Field verifyChecksum = DistributedFileSystem.class.getDeclaredField("verifyChecksum"); + verifyChecksum.setAccessible(true); + assertEquals(Boolean.FALSE, verifyChecksum.get(dfs)); + } + + @AfterClass + public static void teardown() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } +} diff --git a/src/main/asciidoc/_chapters/performance.adoc b/src/main/asciidoc/_chapters/performance.adoc index 866779ca78..ebeed57f9c 100644 --- a/src/main/asciidoc/_chapters/performance.adoc +++ b/src/main/asciidoc/_chapters/performance.adoc @@ -841,17 +841,26 @@ See the link:https://issues.apache.org/jira/browse/HDFS-1599[Umbrella Jira Ticke [[perf.hdfs.configs.localread]] === Leveraging local data -Since Hadoop 1.0.0 (also 0.22.1, 0.23.1, CDH3u3 and HDP 1.0) via link:https://issues.apache.org/jira/browse/HDFS-2246[HDFS-2246], it is possible for the DFSClient to take a "short circuit" and read directly from the disk instead of going through the DataNode when the data is local. +It is possible for the DFSClient that HBase uses to take a "short circuit" and read directly from +the disk instead of going through the DataNode when the data is local. What this means for HBase is that the RegionServers can read directly off their machine's disks instead of having to open a socket to talk to the DataNode, the former being generally much faster. See JD's link:http://files.meetup.com/1350427/hug_ebay_jdcryans.pdf[Performance Talk]. Also see link:http://search-hadoop.com/m/zV6dKrLCVh1[HBase, mail # dev - read short circuit] thread for more discussion around short circuit reads. +The exact numbers presented there may be out of date, and a few of the configuration properties +have moved around, but the general concepts still apply. To enable "short circuit" reads, it will depend on your version of Hadoop. The original shortcircuit read patch was much improved upon in Hadoop 2 in link:https://issues.apache.org/jira/browse/HDFS-347[HDFS-347]. -See http://blog.cloudera.com/blog/2013/08/how-improved-short-circuit-local-reads-bring-better-performance-and-security-to-hadoop/ for details on the difference between the old and new implementations. -See link:http://archive.cloudera.com/cdh4/cdh/4/hadoop/hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html[Hadoop shortcircuit reads configuration page] for how to enable the latter, better version of shortcircuit. -For example, here is a minimal config. -enabling short-circuit reads added to _hbase-site.xml_: +See HDFS documentation at link:https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs +/ShortCircuitLocalReads.html[Short-Circuit Local Reads] for full detail on how to configure +and enable the SCR feature. + +Both the RegionServer and the DataNode will need to have the hadoop native `.so` library loaded +for SCR functionality to be available. Both ends of the connection, that is both the DFS +DataNode and HBase RegionServer, will need to be configured with matching properties to allow +communication to take place. + +For example, here is a minimal configuration snippet: [source,xml] ---- @@ -874,23 +883,47 @@ enabling short-circuit reads added to _hbase-site.xml_: ---- -Be careful about permissions for the directory that hosts the shared domain socket; dfsclient will complain if open to other than the hbase user. - -If you are running on an old Hadoop, one that is without link:https://issues.apache.org/jira/browse/HDFS-347[HDFS-347] but that has link:https://issues.apache.org/jira/browse/HDFS-2246[HDFS-2246], you must set two configurations. -First, the hdfs-site.xml needs to be amended. -Set the property `dfs.block.local-path-access.user` to be the _only_ user that can use the shortcut. -This has to be the user that started HBase. -Then in hbase-site.xml, set `dfs.client.read.shortcircuit` to be `true` - -Services -- at least the HBase RegionServers -- will need to be restarted in order to pick up the new configurations. +Other properties should be set for you by HBase internals, but in rare cases you may continue to +encounter issues. In those scenarios, you may need to perform advanced tuning. .dfs.client.read.shortcircuit.buffer.size [NOTE] ==== -The default for this value is too high when running on a highly trafficked HBase. -In HBase, if this value has not been set, we set it down from the default of 1M to 128k (Since HBase 0.98.0 and 0.96.1). See link:https://issues.apache.org/jira/browse/HBASE-8143[HBASE-8143 HBase on Hadoop 2 with local short circuit reads (ssr) causes OOM]). The Hadoop DFSClient in HBase will allocate a direct byte buffer of this size for _each_ block it has open; given HBase keeps its HDFS files open all the time, this can add up quickly. +The HDFS default for this value is too high when running HBase under heavy load. + +In HBase, if this value has not been configured, we lower it from the default of 1M to 128k. +See link:https://issues.apache.org/jira/browse/HBASE-8143[HBASE-8143] for discussion of "HBase on +Hadoop 2 with local short circuit reads (ssr) causes OOM". + +The Hadoop DFSClient in HBase will allocate a direct byte buffer of this size for _each_ block it +has open; given HBase keeps its HDFS files open all the time, this can add up quickly. ==== +Other configurations you may consider: + +* In `hbase-site.xml`, increase `hbase.hstore.min.locality.to.skip.major.compact` from the +default value of `0.0` (up to a max of `1.0`) to encourage more data locality during compactions. +A value of `0.7` has been experimentally shown to perform well, but likely needs additional +refinement based on specific workload. +* Make sure DataNodes have enough handlers for block transfers. In `hdfs-site.xml`, consider the +following parameters: +- `dfs.datanode.max.xcievers >= 8192` +- `dfs.datanode.handler.count =` number of spindles +* At least one user reported improvements after tuning `dfs.client.read.shortcircuit.streams.cache +.size` and `dfs.client.socketcache.capacity`. Documentation is sparse on these options, you may +end up reading source code if you want to adjust these. + +Be careful about permissions for the directory that hosts the shared domain socket; dfsclient will complain if open to other than the hbase user. + +HBase RegionServers will need to be restarted in order to pick up the new configurations. +Check the RegionServer logs after restart. You should only see complaint if misconfiguration. +Otherwise, shortcircuit read operates quietly in background. It does not provide metrics so +no optics on how effective it is but read latencies should show a marked improvement, especially if +good data locality, lots of random reads, and dataset is larger than available cache. + +For more on short-circuit reads, see Colin's old blog on rollout, +link:http://blog.cloudera.com/blog/2013/08/how-improved-short-circuit-local-reads-bring-better-performance-and-security-to-hadoop/[How Improved Short-Circuit Local Reads Bring Better Performance and Security to Hadoop]. + [[perf.hdfs.comp]] === Performance Comparisons of HBase vs. HDFS diff --git a/src/main/asciidoc/_chapters/schema_design.adoc b/src/main/asciidoc/_chapters/schema_design.adoc index fdbd18468c..4afac108bf 100644 --- a/src/main/asciidoc/_chapters/schema_design.adoc +++ b/src/main/asciidoc/_chapters/schema_design.adoc @@ -1148,38 +1148,8 @@ Detect regionserver failure as fast as reasonable. Set the following parameters: [[shortcircuit.reads]] === Optimize on the Server Side for Low Latency -Skip the network for local blocks when the RegionServer goes to read from HDFS by exploiting HDFS's -link:https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html[Short-Circuit Local Reads] facility. -Note how setup must be done both at the datanode and on the dfsclient ends of the conneciton -- i.e. at the RegionServer -and how both ends need to have loaded the hadoop native `.so` library. -After configuring your hadoop setting _dfs.client.read.shortcircuit_ to _true_ and configuring -the _dfs.domain.socket.path_ path for the datanode and dfsclient to share and restarting, next configure -the regionserver/dfsclient side. - -* In `hbase-site.xml`, set the following parameters: -- `dfs.client.read.shortcircuit = true` -- `dfs.client.read.shortcircuit.skip.checksum = true` so we don't double checksum (HBase does its own checksumming to save on i/os. See <> for more on this. -- `dfs.domain.socket.path` to match what was set for the datanodes. -- `dfs.client.read.shortcircuit.buffer.size = 131072` Important to avoid OOME -- hbase has a default it uses if unset, see `hbase.dfs.client.read.shortcircuit.buffer.size`; its default is 131072. -* Ensure data locality. In `hbase-site.xml`, set `hbase.hstore.min.locality.to.skip.major.compact = 0.7` (Meaning that 0.7 \<= n \<= 1) -* Make sure DataNodes have enough handlers for block transfers. In `hdfs-site.xml`, set the following parameters: -- `dfs.datanode.max.xcievers >= 8192` -- `dfs.datanode.handler.count =` number of spindles - -Check the RegionServer logs after restart. You should only see complaint if misconfiguration. -Otherwise, shortcircuit read operates quietly in background. It does not provide metrics so -no optics on how effective it is but read latencies should show a marked improvement, especially if -good data locality, lots of random reads, and dataset is larger than available cache. - -Other advanced configurations that you might play with, especially if shortcircuit functionality -is complaining in the logs, include `dfs.client.read.shortcircuit.streams.cache.size` and -`dfs.client.socketcache.capacity`. Documentation is sparse on these options. You'll have to -read source code. - -For more on short-circuit reads, see Colin's old blog on rollout, -link:http://blog.cloudera.com/blog/2013/08/how-improved-short-circuit-local-reads-bring-better-performance-and-security-to-hadoop/[How Improved Short-Circuit Local Reads Bring Better Performance and Security to Hadoop]. -The link:https://issues.apache.org/jira/browse/HDFS-347[HDFS-347] issue also makes for an -interesting read showing the HDFS community at its best (caveat a few comments). +See the performance section for more details about <>. === JVM Tuning -- 2.16.1