From a210cfb442a722840d519d8c4f1039896a967baf Mon Sep 17 00:00:00 2001 From: stack Date: Fri, 6 Sep 2019 14:56:05 +0200 Subject: [PATCH] HBASE-22796 [HBCK2] Add fix of overlaps to fixMeta hbck Service --- .../hadoop/hbase/master/CatalogJanitor.java | 27 ++-- .../apache/hadoop/hbase/master/MetaFixer.java | 110 +++++++++++---- .../master/assignment/RegionStateNode.java | 2 +- .../hadoop/hbase/master/TestMetaFixer.java | 53 +++++-- .../hbase/master/TestMetaFixerNoCluster.java | 130 ++++++++++++++++++ 5 files changed, 277 insertions(+), 45 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixerNoCluster.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index 81e582576f..0889766af1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -457,6 +457,12 @@ public class CatalogJanitor extends ScheduledChore { return this.holes; } + /** + * @return Overlap pairs found as we scanned hbase:meta; ordered by hbase:meta + * table sort. Pairs of overlaps may have overlap with subsequent pairs. + * See {@link MetaFixer#calculateMerges(List)} where we aggregate overlaps + * for a single 'merge' call. + */ public List> getOverlaps() { return this.overlaps; } @@ -479,20 +485,20 @@ public class CatalogJanitor extends ScheduledChore { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (Pair p: this.holes) { if (sb.length() > 0) { sb.append(", "); } - sb.append("hole=" + p.getFirst().getRegionNameAsString() + "/" + - p.getSecond().getRegionNameAsString()); + sb.append("hole=").append(p.getFirst().getRegionNameAsString()).append("/"). + append(p.getSecond().getRegionNameAsString()); } for (Pair p: this.overlaps) { if (sb.length() > 0) { sb.append(", "); } - sb.append("overlap=" + p.getFirst().getRegionNameAsString() + "/" + - p.getSecond().getRegionNameAsString()); + sb.append("overlap=").append(p.getFirst().getRegionNameAsString()).append("/"). + append(p.getSecond().getRegionNameAsString()); } for (byte [] r: this.emptyRegionInfo) { if (sb.length() > 0) { @@ -587,7 +593,7 @@ public class CatalogJanitor extends ScheduledChore { MetaTableAccessor.getRegionInfoColumn()); } else { ri = locations.getDefaultRegionLocation().getRegion(); - checkServer(metaTableRow.getRow(), locations); + checkServer(locations); } if (ri == null) { @@ -658,7 +664,7 @@ public class CatalogJanitor extends ScheduledChore { /** * Run through referenced servers and save off unknown and the dead. */ - private void checkServer(byte [] metaTableRow, RegionLocations locations) { + private void checkServer(RegionLocations locations) { if (this.services == null) { // Can't do this test if no services. return; @@ -672,6 +678,11 @@ public class CatalogJanitor extends ScheduledChore { if (sn == null) { continue; } + if (location.getRegion() == null) { + LOG.warn("Empty RegionInfo in {}", location); + // This should never happen but if it does, will mess up below. + continue; + } // skip the offline regions which belong to disabled table. if (isTableDisabled(location.getRegion())) { continue; @@ -680,7 +691,7 @@ public class CatalogJanitor extends ScheduledChore { isServerKnownAndOnline(sn); switch (state) { case UNKNOWN: - this.report.unknownServers.add(new Pair(location.getRegion(), sn)); + this.report.unknownServers.add(new Pair<>(location.getRegion(), sn)); break; default: diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java index d94e4bd5cb..f970215425 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java @@ -18,8 +18,12 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; @@ -32,6 +36,7 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; +import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,7 +45,7 @@ import org.slf4j.LoggerFactory; * Server-side fixing of bad or inconsistent state in hbase:meta. * Distinct from MetaTableAccessor because {@link MetaTableAccessor} is about low-level * manipulations driven by the Master. This class MetaFixer is - * employed by the Master and it 'knows' about holes and orphan + * employed by the Master and it 'knows' about holes and orphans * and encapsulates their fixing on behalf of the Master. */ @InterfaceAudience.Private @@ -66,14 +71,12 @@ class MetaFixer { /** * If hole, it papers it over by adding a region in the filesystem and to hbase:meta. * Does not assign. - * @return True if we fixed any 'holes'. */ - boolean fixHoles(CatalogJanitor.Report report) throws IOException { - boolean result = false; + void fixHoles(CatalogJanitor.Report report) throws IOException { List> holes = report.getHoles(); if (holes.isEmpty()) { LOG.debug("No holes."); - return result; + return; } for (Pair p: holes) { RegionInfo ri = getHoleCover(p); @@ -86,11 +89,10 @@ class MetaFixer { // in hbase:meta (if the below fails). Should be able to rerun the fix. // The second call to createRegionDir will just go through. Idempotent. Put put = MetaTableAccessor.makePutFromRegionInfo(ri, HConstants.LATEST_TIMESTAMP); - MetaTableAccessor.putsToMetaTable(this.masterServices.getConnection(), Arrays.asList(put)); + MetaTableAccessor.putsToMetaTable(this.masterServices.getConnection(), + Collections.singletonList(put)); LOG.info("Fixed hole by adding {}; region is NOT assigned (assign to online).", ri); - result = true; } - return result; } /** @@ -136,28 +138,82 @@ class MetaFixer { return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build(); } - boolean fixOverlaps(CatalogJanitor.Report report) throws IOException { - boolean result = false; - List> overlaps = report.getOverlaps(); + /** + * Fix overlaps noted in CJ consistency report. + */ + void fixOverlaps(CatalogJanitor.Report report) throws IOException { + for (Set regions: calculateMerges(report.getOverlaps())) { + RegionInfo [] regionsArray = regions.toArray(new RegionInfo [] {}); + this.masterServices.mergeRegions(regionsArray, + false, HConstants.NO_NONCE, HConstants.NO_NONCE); + } + } + + /** + * Run through overlaps and return a list of merges to run. + * Presumes overlaps are ordered (which they are coming out of the CatalogJanitor + * consistency report). + */ + @VisibleForTesting + static List> calculateMerges(List> overlaps) { if (overlaps.isEmpty()) { LOG.debug("No overlaps."); - return result; + return Collections.emptyList(); } - for (Pair p: overlaps) { - RegionInfo ri = getHoleCover(p); - if (ri == null) { - continue; + List> merges = new ArrayList<>(); + SortedSet currentMergeSet = new TreeSet<>(); + RegionInfo regionInfoWithlargestEndKey = null; + for (Pair pair: overlaps) { + if (regionInfoWithlargestEndKey != null && + !isOverlap(regionInfoWithlargestEndKey, pair)) { + merges.add(currentMergeSet); + currentMergeSet = new TreeSet<>(); } - Configuration configuration = this.masterServices.getConfiguration(); - HRegion.createRegionDir(configuration, ri, FSUtils.getRootDir(configuration)); - // If an error here, then we'll have a region in the filesystem but not - // in hbase:meta (if the below fails). Should be able to rerun the fix. - // The second call to createRegionDir will just go through. Idempotent. - Put put = MetaTableAccessor.makePutFromRegionInfo(ri, HConstants.LATEST_TIMESTAMP); - MetaTableAccessor.putsToMetaTable(this.masterServices.getConnection(), Arrays.asList(put)); - LOG.info("Fixed hole by adding {}; region is NOT assigned (assign to online).", ri); - result = true; + currentMergeSet.add(pair.getFirst()); + currentMergeSet.add(pair.getSecond()); + regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey( + getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()), + regionInfoWithlargestEndKey); + } + merges.add(currentMergeSet); + return merges; + } + + /** + * @return Either a or b, whichever has the + * endkey that is furthest along in the Table. + */ + @VisibleForTesting + static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) { + if (a == null) { + // b may be null. + return b; + } + if (b == null) { + // Both are null. The return is not-defined. + return a; + } + assert a.getTable().equals(b.getTable()); + if (a.isLast()) { + return a; + } + if (b.isLast()) { + return b; + } + int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey()); + return compare == 0 || compare > 0? a: b; + } + + /** + * @return True if an overlap found between passed in ri and + * the pair. Does NOT check the pairs themselves overlap. + */ + @VisibleForTesting + static boolean isOverlap(RegionInfo ri, Pair pair) { + if (ri == null || pair == null) { + // Can't be an overlap in either of these cases. + return false; } - return result; + return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond()); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java index 12eb0a0fe4..30e67061f2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java @@ -294,7 +294,7 @@ public class RegionStateNode implements Comparable { RegionInfo ri = getRegionInfo(); State s = state; if (s != State.OPEN) { - throw new DoNotRetryRegionException(ri.getEncodedName() + " is no OPEN; state=" + s); + throw new DoNotRetryRegionException(ri.getEncodedName() + " is not OPEN; state=" + s); } if (ri.isSplitParent()) { throw new DoNotRetryRegionException( diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java index 2bffe31415..0d53319662 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.apache.hadoop.hbase.HBaseClassTestRule; @@ -26,8 +28,10 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Threads; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -36,8 +40,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; @Category({MasterTests.class, LargeTests.class}) @@ -47,7 +49,6 @@ public class TestMetaFixer { HBaseClassTestRule.forClass(TestMetaFixer.class); @Rule public TestName name = new TestName(); - private static final Logger LOG = LoggerFactory.getLogger(TestMetaFixer.class); private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); @@ -81,12 +82,11 @@ public class TestMetaFixer { deleteRegion(services, ris.get(ris.size() -1)); deleteRegion(services, ris.get(3)); deleteRegion(services, ris.get(0)); - ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); services.getCatalogJanitor().scan(); report = services.getCatalogJanitor().getLastReport(); Assert.assertEquals(report.toString(), 3, report.getHoles().size()); MetaFixer fixer = new MetaFixer(services); - Assert.assertTrue(fixer.fixHoles(report)); + fixer.fixHoles(report); services.getCatalogJanitor().scan(); report = services.getCatalogJanitor().getLastReport(); Assert.assertTrue(report.toString(), report.isEmpty()); @@ -110,18 +110,53 @@ public class TestMetaFixer { List ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); services.getCatalogJanitor().scan(); - CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); - int originalCount = ris.size(); deleteRegion(services, ris.get(0)); services.getCatalogJanitor().scan(); - report = services.getCatalogJanitor().getLastReport(); + CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); Assert.assertTrue(ris.isEmpty()); MetaFixer fixer = new MetaFixer(services); - Assert.assertFalse(fixer.fixHoles(report)); + fixer.fixHoles(report); report = services.getCatalogJanitor().getLastReport(); Assert.assertTrue(report.isEmpty()); ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); Assert.assertEquals(0, ris.size()); } + + @Test + public void testOverlap() throws IOException { + TableName tn = TableName.valueOf(this.name.getMethodName()); + TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); + List ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); + MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); + services.getCatalogJanitor().scan(); + CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); + Assert.assertTrue(report.isEmpty()); + // Make a simple overlap spanning second and third region. + RegionInfo secondRegion = ris.get(1); + RegionInfo thirdRegion = ris.get(2); + RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(tn). + setStartKey(secondRegion.getStartKey()). + setEndKey(thirdRegion.getEndKey()). + build(); + MetaTableAccessor.putsToMetaTable(services.getConnection(), + Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion, + System.currentTimeMillis()))); + services.getAssignmentManager().assign(overlapRegion); + services.getCatalogJanitor().scan(); + report = services.getCatalogJanitor().getLastReport(); + Assert.assertEquals(2, report.getOverlaps().size()); + MetaFixer fixer = new MetaFixer(services); + fixer.fixOverlaps(report); + // Wait on fix to complete. + while (true) { + services.getCatalogJanitor().scan(); + report = services.getCatalogJanitor().getLastReport(); + if (report.isEmpty()) { + break; + } + Threads.sleep(10); + } + Assert.assertTrue(report.toString(), report.isEmpty()); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixerNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixerNoCluster.java new file mode 100644 index 0000000000..210ca1f7e9 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixerNoCluster.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Test small utility methods inside {@link MetaFixer}. + * For cluster tests see {@link TestMetaFixer} + */ +@Category({MasterTests.class, SmallTests.class}) +public class TestMetaFixerNoCluster { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMetaFixer.class); + private static byte [] A = Bytes.toBytes("a"); + private static byte [] B = Bytes.toBytes("b"); + private static byte [] C = Bytes.toBytes("c"); + private static byte [] D = Bytes.toBytes("d"); + private static RegionInfo ALL = RegionInfoBuilder.FIRST_META_REGIONINFO; + private static RegionInfo _ARI = RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME). + setEndKey(A).build(); + private static RegionInfo _BRI = RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME). + setEndKey(B).build(); + private static RegionInfo ABRI = RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME). + setStartKey(A).setEndKey(B).build(); + private static RegionInfo ACRI = + org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME). + setStartKey(A).setEndKey(C).build(); + private static RegionInfo BCRI = + org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME). + setStartKey(B).setEndKey(C).build(); + private static RegionInfo CDRI = + org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME). + setStartKey(C).setEndKey(D).build(); + private static RegionInfo ADRI = + org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME). + setStartKey(A).setEndKey(D).build(); + private static RegionInfo D_RI = + org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME). + setStartKey(D).build(); + private static RegionInfo C_RI = + org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME). + setStartKey(C).build(); + + @Test + public void testGetRegionInfoWithLargestEndKey() { + assertTrue(MetaFixer.getRegionInfoWithLargestEndKey(_ARI, _BRI).equals(_BRI)); + assertTrue(MetaFixer.getRegionInfoWithLargestEndKey(C_RI, D_RI).equals(C_RI)); + assertTrue(MetaFixer.getRegionInfoWithLargestEndKey(ABRI, CDRI).equals(CDRI)); + assertTrue(MetaFixer.getRegionInfoWithLargestEndKey(null, CDRI).equals(CDRI)); + assertTrue(MetaFixer.getRegionInfoWithLargestEndKey(null, null) == null); + } + + @Test + public void testIsOverlap() { + assertTrue(MetaFixer.isOverlap(_BRI, new Pair(ABRI, ACRI))); + assertFalse(MetaFixer.isOverlap(_ARI, new Pair(C_RI, D_RI))); + assertTrue(MetaFixer.isOverlap(ADRI, new Pair(CDRI, C_RI))); + assertFalse(MetaFixer.isOverlap(_BRI, new Pair(CDRI, C_RI))); + assertFalse(MetaFixer.isOverlap(_BRI, new Pair(CDRI, C_RI))); + } + + @Test + public void testCalculateMergesNoAggregation() { + List> overlaps = new ArrayList<>(); + overlaps.add(new Pair(_ARI, _BRI)); + overlaps.add(new Pair(C_RI, D_RI)); + List> merges = MetaFixer.calculateMerges(overlaps); + assertEquals(2, merges.size()); + assertEquals(2, merges.get(0).size()); + assertEquals(2, merges.get(1).size()); + } + + @Test + public void testCalculateMergesAggregation() { + List> overlaps = new ArrayList<>(); + overlaps.add(new Pair(ALL, D_RI)); + overlaps.add(new Pair(_ARI, _BRI)); + overlaps.add(new Pair(C_RI, D_RI)); + List> merges = MetaFixer.calculateMerges(overlaps); + assertEquals(1, merges.size()); + assertEquals(5, merges.get(0).size()); + } + + @Test + public void testCalculateMergesNoRepeatOfRegionNames() { + List> overlaps = new ArrayList<>(); + overlaps.add(new Pair(_BRI, ABRI)); + overlaps.add(new Pair(ABRI, ADRI)); + List> merges = MetaFixer.calculateMerges(overlaps); + assertEquals(1, merges.size()); + // There should be three regions to merge, not four. + assertEquals(3, merges.get(0).size()); + } +} -- 2.19.1