diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java index 7915ac3cef..6a7feec8cb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java @@ -22,6 +22,8 @@ import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.c import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkNotNull; import com.lmax.disruptor.RingBuffer; + +import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InterruptedIOException; @@ -385,7 +387,17 @@ public abstract class AbstractFSWAL implements WAL { if (failIfWALExists) { final FileStatus[] walFiles = CommonFSUtils.listStatus(fs, walDir, ourFiles); if (null != walFiles && 0 != walFiles.length) { - throw new IOException("Target WAL already exists within directory " + walDir); + // See HBASE-21751 + // There is a case that the first attempt of creating WAL failed + // during region open, leaving a wal with zero size(In some special cases, e.g. disk full, HDFS + // successfully creates the file but can't allocate blocks for the file, it will result in a + // empty file) will cause the wal can't be created again. Thus the region can't open on this + // RS forever. + for (FileStatus fileStatus : walFiles) { + if (fileStatus.getLen() > 0) { + throw new IOException("Target WAL already exists within directory " + walDir); + } + } } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestGetWALFailedWithEmptyWALCreated.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestGetWALFailedWithEmptyWALCreated.java new file mode 100644 index 0000000000..2ef0a06172 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestGetWALFailedWithEmptyWALCreated.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.apache.hadoop.hbase.HConstants.HREGION_LOGDIR_NAME; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.wal.WAL; +import org.apache.hadoop.hbase.wal.WALFactory; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test case for HBASE-21751 + */ +@Category({ RegionServerTests.class, MediumTests.class}) +public class TestGetWALFailedWithEmptyWALCreated { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestGetWALFailedWithEmptyWALCreated.class); + + private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + public static Configuration CONF ; + + + + @BeforeClass + public static void setUp() throws Exception { + //TEST_UTIL = HBaseTestingUtility.createLocalHTU(); + CONF = TEST_UTIL.getConfiguration(); + } + + @AfterClass + public static void shutdown() throws Exception { + TEST_UTIL.cleanupTestDir(); + } + + @Test + public void test() throws Exception { + String fakeServerName = "testservernameWALDir"; + RegionInfo hri = RegionInfoBuilder + .newBuilder(TableName.valueOf("TestGetWALFailedWithEmptyWALCreated")).build(); + Path walDir = new Path(TEST_UTIL.getDataTestDir(), HREGION_LOGDIR_NAME); + Path rsWalDir = new Path(walDir, fakeServerName); + //Inject a empty wal, simulate the situation where the first attempt of creating WAL failed + // during region open, leaving a wal with zero size(In some special cases, e.g. disk full, HDFS + // successfully creates the file but can't allocate blocks for the file, it will result in a + // empty file) will cause the wal can't be created again. Thus the region can't open on this + // RS forever. + Path emptyWalPath = new Path(rsWalDir, fakeServerName + "." + System.currentTimeMillis()); + TEST_UTIL.getTestFileSystem().create(emptyWalPath).close(); + Configuration confForWAL = new Configuration(CONF); + confForWAL.set(HConstants.HBASE_DIR, TEST_UTIL.getDataTestDir().toString()); + //WAl should be able to create even if there is a empty wal inside the dir + WAL wal = new WALFactory(confForWAL, fakeServerName).getWAL(hri); + } +}