From f208f5321dcd973db1581dda94ff56eb766c3fdd Mon Sep 17 00:00:00 2001 From: stack Date: Tue, 30 Jul 2019 17:07:48 -0700 Subject: [PATCH] HBASE-22771 [HBCK2] fixMeta method and server-side support This is a first cut at this patch. Implements hold fixing only currently. Add a fixMeta method to Hbck Interface. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java Bug fix. If hole is on end of last table, I wasn't seeing it. A hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java Add a general meta fixer class. Explains up top why this stuff doesn't belong inside MetaTableAccessor. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Break out the filesystem messing so don't have to copy it nor do more than is needed doing fixup for Region holes. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java Change behavious slightly. If directory exists, don't fail as we did but try and keep going and create .regioninfo file if missing (or overwrite if in place). This should make it idempotent. Can rerun command. Lets see if any repercussions in test suite. A hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java Add test. --- .../apache/hadoop/hbase/client/HBaseHbck.java | 10 ++ .../org/apache/hadoop/hbase/client/Hbck.java | 5 + .../src/main/protobuf/Master.proto | 10 ++ .../hadoop/hbase/master/CatalogJanitor.java | 10 ++ .../hadoop/hbase/master/HbckChecker.java | 6 +- .../hbase/master/MasterRpcServices.java | 14 ++ .../apache/hadoop/hbase/master/MetaFixer.java | 137 ++++++++++++++++++ .../hadoop/hbase/regionserver/HRegion.java | 21 ++- .../hbase/regionserver/HRegionFileSystem.java | 15 +- .../hadoop/hbase/master/TestMetaFixer.java | 94 ++++++++++++ 10 files changed, 309 insertions(+), 13 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java index 996cd26509..4a91a9f198 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java @@ -171,4 +171,14 @@ public class HBaseHbck implements Hbck { throw new IOException(se); } } + + @Override + public void fixMeta() throws IOException { + try { + this.hbck.fixMeta(rpcControllerFactory.newController(), + MasterProtos.FixMetaRequest.newBuilder().build()); + } catch (ServiceException se) { + throw new IOException(se); + } + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java index 1952e36365..cc12f2cbf3 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java @@ -121,4 +121,9 @@ public interface Hbck extends Abortable, Closeable { } List scheduleServerCrashProcedures(List serverNames) throws IOException; + + /** + * Fix Meta. + */ + void fixMeta() throws IOException; } diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto index 35ae5ea0a2..314c8a366e 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto @@ -1108,6 +1108,12 @@ message ScheduleServerCrashProcedureResponse { repeated uint64 pid = 1; } +message FixMetaRequest { +} + +message FixMetaResponse { +} + service HbckService { /** Update state of the table in meta only*/ rpc SetTableStateInMeta(SetTableStateInMetaRequest) @@ -1138,4 +1144,8 @@ service HbckService { /** Schedule a ServerCrashProcedure to help recover a crash server */ rpc ScheduleServerCrashProcedure(ScheduleServerCrashProcedureRequest) returns(ScheduleServerCrashProcedureResponse); + + /** Schedule a fix meta run. */ + rpc FixMeta(FixMetaRequest) + returns(FixMetaResponse); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index e6564a54e6..d2eb36589b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -561,6 +561,11 @@ public class CatalogJanitor extends ScheduledChore { public byte[] getMetaRow() { return metaRow; } + + @Override + public String toString() { + return "metaRow=" + Bytes.toStringBinary(getMetaRow()) + ", regionInfo=" + getRegionInfo(); + } } /** @@ -743,6 +748,11 @@ public class CatalogJanitor extends ScheduledChore { @Override public void close() throws IOException { + // This is a table transition... after the last region. Check previous. + // Should be last region. If not, its a hole on end of laster table. + if (this.previous != null && !this.previous.getRegionInfo().isLast()) { + addHole(this.previous, MetaRow.UNDEFINED); + } this.closed = true; } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java index fbc2c70727..95d211465b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java @@ -43,10 +43,12 @@ import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.collect.Lists; /** - * Used to do the hbck checking job at master side. + * Used to do the hbck checking job at master side. Runs as a {@link ScheduledChore}. + * Inherits and adapts facility from HBCK1. Looks at HDFS and remote RegionServers. + * Does not peruse hbase:meta. That is done by the CatalogJanitor when it runs. + * @see CatalogJanitor */ @InterfaceAudience.Private -@InterfaceStability.Evolving public class HbckChecker extends ScheduledChore { private static final Logger LOG = LoggerFactory.getLogger(HbckChecker.class.getName()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index a7df7d4a6e..54c100db66 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -189,6 +189,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableTabl import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableTableResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ExecProcedureRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ExecProcedureResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.FixMetaRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.FixMetaResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetClusterStatusRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetClusterStatusResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest; @@ -2533,6 +2535,18 @@ public class MasterRpcServices extends RSRpcServices } } + @Override + public MasterProtos.FixMetaResponse fixMeta(RpcController controller, FixMetaRequest request) + throws ServiceException { + try { + MetaFixer mf = new MetaFixer(this.master); + mf.fix(); + return MasterProtos.FixMetaResponse.newBuilder().build(); + } catch (IOException ioe) { + throw new ServiceException(ioe); + } + } + @Override public SwitchRpcThrottleResponse switchRpcThrottle(RpcController controller, SwitchRpcThrottleRequest request) throws ServiceException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java new file mode 100644 index 0000000000..de67299ea9 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +/** + * Server-side fixing of bad or inconsistent state in hbase:meta. + * Distinct from MetaTableAccessor because MTA is about low-level + * manipulations driven by the Master. This class MetaFixer is + * employed by the Master and it 'knows' about holes and orphan + * and encapsulates their fixing on behalf of the Master. + */ +@InterfaceAudience.Private +class MetaFixer { + private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class.getName()); + private final MasterServices masterServices; + + MetaFixer(MasterServices masterServices) { + this.masterServices = masterServices; + } + + void fix() throws IOException { + fixHoles(); + } + + /** + * If hole, it papers it over by adding a region in the filesystem and to hbase:meta. + * Does not assign. + * @return True if we fixed any 'holes'. + */ + boolean fixHoles() throws IOException { + boolean result = false; + CatalogJanitor.Report r = this.masterServices.getCatalogJanitor().getLastReport(); + if (r == null) { + LOG.debug("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " + + "shell or wait until CatalogJanitor chore runs."); + return result; + } + List> holes = r.getHoles(); + if (r.getHoles().isEmpty()) { + LOG.debug("No holes."); + return result; + } + for (Pair p: holes) { + RegionInfo ri = getHoleCover(p); + if (ri == null) { + continue; + } + Configuration configuration = this.masterServices.getConfiguration(); + HRegion.createRegionDir(this.masterServices.getConfiguration(), ri); + // If an error here, then we'll have a region in the filesystem but not + // in hbase:meta (if the below fails). Should be able to rerun the fix. + // The second call to createRegionDir will just go through. Idempotent. + Put put = MetaTableAccessor.makePutFromRegionInfo(ri, HConstants.LATEST_TIMESTAMP); + MetaTableAccessor.putsToMetaTable(this.masterServices.getConnection(), Arrays.asList(put)); + LOG.info("Created UNASSIGNED {} and inserted into hbase:meta", ri); + result = true; + } + return result; + } + + /** + * @return Calculated RegionInfo that covers the hole hole + */ + private RegionInfo getHoleCover(Pair hole) { + RegionInfo holeCover = null; + RegionInfo left = hole.getFirst().getRegionInfo(); + RegionInfo right = hole.getSecond().getRegionInfo(); + if (left.getTable().equals(right.getTable())) { + // Simple case. + if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) { + LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey; " + + "left=<{}>, right=<{}>", left, right); + return holeCover; + } + holeCover = RegionInfoBuilder.newBuilder(left).setStartKey(left.getEndKey()). + setEndKey(right.getStartKey()).build(); + } else { + boolean leftUndefined = left.equals(RegionInfo.UNDEFINED); + boolean rightUnefined = right.equals(RegionInfo.UNDEFINED); + boolean last = left.isLast(); + boolean first = right.isFirst(); + if (leftUndefined && rightUnefined) { + LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " + + "UNDEFINED; left=<{}>, right=<{}>", left, right); + return holeCover; + } + if (leftUndefined || last) { + holeCover = RegionInfoBuilder.newBuilder(right).setStartKey(HConstants.EMPTY_START_ROW). + setEndKey(right.getStartKey()).build(); + } else if (right.equals(RegionInfo.UNDEFINED) || first) { + holeCover = RegionInfoBuilder.newBuilder(left).setStartKey(left.getEndKey()). + setEndKey(HConstants.EMPTY_END_ROW).build(); + } else { + LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", + left, right); + return holeCover; + } + } + return holeCover; + } + + void fixOphans() { + // TODO + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 33cdda602a..356147686d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -7125,12 +7125,12 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi final Configuration conf, final TableDescriptor hTableDescriptor, final WAL wal, final boolean initialize) throws IOException { - LOG.info("creating HRegion " + info.getTable().getNameAsString() - + " HTD == " + hTableDescriptor + " RootDir = " + rootDir + - " Table name == " + info.getTable().getNameAsString()); + LOG.info("creating " + info + + ", tableDescriptor=" + (hTableDescriptor == null? "null": hTableDescriptor) + + ", regionDir=" + rootDir); + createRegionDir(conf, info); FileSystem fs = rootDir.getFileSystem(conf); Path tableDir = FSUtils.getTableDir(rootDir, info.getTable()); - HRegionFileSystem.createRegionOnFileSystem(conf, fs, tableDir, info); HRegion region = HRegion.newHRegion(tableDir, wal, fs, conf, info, hTableDescriptor, null); if (initialize) { region.initialize(null); @@ -7138,6 +7138,19 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi return region; } + /** + * Create the region directory in the filesystem. + */ + public static HRegionFileSystem createRegionDir(Configuration configuration, RegionInfo ri) + throws IOException { + Path rootDir = FSUtils.getRootDir(configuration); + FileSystem fs = rootDir.getFileSystem(configuration); + Path tableDir = FSUtils.getTableDir(rootDir, ri.getTable()); + // If directory already exists, will log warning and keep going. Will try to create + // .regioninfo. If one exists, will overwrite. + return HRegionFileSystem.createRegionOnFileSystem(configuration, fs, tableDir, ri); + } + public static HRegion createHRegion(final RegionInfo info, final Path rootDir, final Configuration conf, final TableDescriptor hTableDescriptor, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java index 24026e62e8..45a2627e07 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.UUID; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -867,6 +868,7 @@ public class HRegionFileSystem { /** * Write the .regioninfo file on-disk. + * Overwrites if exists already. */ private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs, final Path regionInfoFile, final byte[] content) throws IOException { @@ -989,13 +991,12 @@ public class HRegionFileSystem { Path regionDir = regionFs.getRegionDir(); if (fs.exists(regionDir)) { LOG.warn("Trying to create a region that already exists on disk: " + regionDir); - throw new IOException("The specified region already exists on disk: " + regionDir); - } - - // Create the region directory - if (!createDirOnFileSystem(fs, conf, regionDir)) { - LOG.warn("Unable to create the region directory: " + regionDir); - throw new IOException("Unable to create region directory: " + regionDir); + } else { + // Create the region directory + if (!createDirOnFileSystem(fs, conf, regionDir)) { + LOG.warn("Unable to create the region directory: " + regionDir); + throw new IOException("Unable to create region directory: " + regionDir); + } } // Write HRI to a file in case we need to recover hbase:meta diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java new file mode 100644 index 0000000000..3611980af1 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +@Category({MasterTests.class, LargeTests.class}) +public class TestMetaFixer { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMetaFixer.class); + @Rule + public TestName name = new TestName(); + private static final Logger LOG = LoggerFactory.getLogger(TestMetaFixer.class); + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @BeforeClass + public static void setupBeforeClass() throws Exception { + TEST_UTIL.startMiniCluster(); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void before() throws IOException { + TEST_UTIL.createMultiRegionTable(TableName.valueOf(this.name.getMethodName()), + HConstants.CATALOG_FAMILY); + } + + @Test + public void testPlugsHole() throws IOException { + // Remove first, last and a middle region. See if hole gets plugged. Table has a bunch + // of regions in it. + TableName tn = TableName.valueOf(this.name.getMethodName()); + List ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); + int originalCount = ris.size(); + MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ris.get(ris.size() - 1)); + MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ris.get(3)); + MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ris.get(0)); + MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); + services.getCatalogJanitor().scan(); + CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); + LOG.info(report.toString()); + assertEquals(3, report.getHoles().size()); + MetaFixer fixer = new MetaFixer(services); + assertTrue(fixer.fixHoles()); + TEST_UTIL.getAdmin().disableTable(tn); + TEST_UTIL.getAdmin().enableTable(tn); + ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); + assertEquals(originalCount, ris.size()); + } +} -- 2.19.1