From 7e72aac764c15b510f21b5c02b644bb21dd54026 Mon Sep 17 00:00:00 2001 From: stack Date: Thu, 25 Jul 2019 23:16:50 -0700 Subject: [PATCH] HBASE-22741 Show catalogjanitor consistency complaints in new 'HBCK Report' page --- .../hadoop/hbase/master/CatalogJanitor.java | 62 +++++---- .../resources/hbase-webapps/master/hbck.jsp | 129 +++++++++++++++++- 2 files changed, 158 insertions(+), 33 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index ec0a21aee9..cbeecb3bc5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -30,6 +30,7 @@ import java.util.Properties; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; @@ -43,9 +44,12 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; @@ -244,7 +248,7 @@ public class CatalogJanitor extends ScheduledChore { * @return Returns last published Report that comes of last successful scan * of hbase:meta. */ - Report getLastReport() { + public Report getLastReport() { return this.lastReport; } @@ -450,19 +454,22 @@ public class CatalogJanitor extends ScheduledChore { /** * Report made by {@link ReportMakingVisitor}. */ - static class Report { - private final long now = EnvironmentEdgeManager.currentTime(); + public static class Report { + public final long now = EnvironmentEdgeManager.currentTime(); // Keep Map of found split parents. These are candidates for cleanup. // Use a comparator that has split parents come before its daughters. - final Map splitParents = new TreeMap<>(new SplitParentFirstComparator()); - final Map mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR); - - final List> holes = new ArrayList<>(); - final List> overlaps = new ArrayList<>(); - final Map unknownServers = new HashMap(); - final List emptyRegionInfo = new ArrayList<>(); - int count = 0; + public final Map splitParents = + new TreeMap<>(new SplitParentFirstComparator()); + public final Map mergedRegions = + new TreeMap<>(RegionInfo.COMPARATOR); + + public final List> holes = new ArrayList<>(); + public final List> overlaps = new ArrayList<>(); + public final List> unknownServers = + new ArrayList>(); + public final List emptyRegionInfo = new ArrayList<>(); + public int count = 0; @VisibleForTesting Report() {} @@ -470,7 +477,7 @@ public class CatalogJanitor extends ScheduledChore { /** * @return True if an 'empty' lastReport -- no problems found. */ - boolean isEmpty() { + public boolean isEmpty() { return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() && this.emptyRegionInfo.isEmpty(); } @@ -498,12 +505,12 @@ public class CatalogJanitor extends ScheduledChore { } sb.append("empty=").append(Bytes.toString(r)); } - for (Map.Entry e: this.unknownServers.entrySet()) { + for (Pair p: this.unknownServers) { if (sb.length() > 0) { sb.append(", "); } - sb.append("unknown_server=").append(e.getKey()).append("/"). - append(e.getValue().getRegionNameAsString()); + sb.append("unknown_server=").append(p.getSecond()).append("/"). + append(Bytes.toString(p.getFirst().metaRow)); } return sb.toString(); } @@ -512,7 +519,7 @@ public class CatalogJanitor extends ScheduledChore { /** * Simple datastructure to hold a MetaRow content. */ - static class MetaRow { + public static class MetaRow { /** * A marker for use in case where there is a hole at the very * first row in hbase:meta. Should never happen. @@ -524,12 +531,12 @@ public class CatalogJanitor extends ScheduledChore { /** * Row from hbase:meta table. */ - final byte [] metaRow; + public final byte [] metaRow; /** * The decoded RegionInfo gotten from hbase:meta. */ - final RegionInfo regionInfo; + public final RegionInfo regionInfo; MetaRow(byte [] metaRow, RegionInfo regionInfo) { this.metaRow = metaRow; @@ -613,13 +620,14 @@ public class CatalogJanitor extends ScheduledChore { MetaTableAccessor.getRegionInfoColumn()); } else { ri = locations.getDefaultRegionLocation().getRegion(); - checkServer(locations); + checkServer(metaTableRow.getRow(), locations); } if (ri == null) { this.report.emptyRegionInfo.add(metaTableRow.getRow()); return ri; } + MetaRow mrri = new MetaRow(metaTableRow.getRow(), ri); // If table is disabled, skip integrity check. if (!isTableDisabled(ri)) { @@ -678,7 +686,7 @@ public class CatalogJanitor extends ScheduledChore { /** * Run through referenced servers and save off unknown and the dead. */ - private void checkServer(RegionLocations locations) { + private void checkServer(byte [] metaTableRow, RegionLocations locations) { if (this.services == null) { // Can't do this test if no services. return; @@ -694,10 +702,10 @@ public class CatalogJanitor extends ScheduledChore { } ServerManager.ServerLiveState state = this.services.getServerManager(). isServerKnownAndOnline(sn); - LOG.info("{} {}", sn, state); switch (state) { case UNKNOWN: - this.report.unknownServers.put(sn, location.getRegion()); + this.report.unknownServers.add( + new Pair(new MetaRow(metaTableRow, location.getRegion()), sn)); break; default: @@ -742,20 +750,22 @@ public class CatalogJanitor extends ScheduledChore { public static void main(String [] args) throws IOException { checkLog4jProperties(); ReportMakingVisitor visitor = new ReportMakingVisitor(null); - try (Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create())) { + Configuration configuration = HBaseConfiguration.create(); + configuration.setBoolean("hbase.defaults.for.version.skip", true); + try (Connection connection = ConnectionFactory.createConnection(configuration)) { /* Used to generate an overlap. - Get g = new Get(Bytes.toBytes("t2,40,1563939166317.5a8be963741d27e9649e5c67a34259d9.")); + */ + Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0.")); g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); try (Table t = connection.getTable(TableName.META_TABLE_NAME)) { Result r = t.get(g); byte [] row = g.getRow(); - row[row.length - 3] <<= ((byte)row[row.length -3]); + row[row.length - 2] <<= ((byte)row[row.length - 2]); Put p = new Put(g.getRow()); p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)); t.put(p); } - */ MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null); Report report = visitor.getReport(); LOG.info(report != null? report.toString(): "empty"); diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp index efd06707f7..cd24234cba 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp @@ -18,14 +18,22 @@ */ --%> <%@ page contentType="text/html;charset=UTF-8" + import="java.time.Instant" + import="java.time.ZoneId" import="java.util.List" import="java.util.Map" import="java.util.stream.Collectors" + import="java.time.ZonedDateTime" + import="java.time.format.DateTimeFormatter" %> <%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %> <%@ page import="org.apache.hadoop.hbase.master.HMaster" %> <%@ page import="org.apache.hadoop.hbase.ServerName" %> +<%@ page import="org.apache.hadoop.hbase.util.Bytes" %> <%@ page import="org.apache.hadoop.hbase.util.Pair" %> +<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %> +<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %> +<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.MetaRow" %> <% HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER); pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName()); @@ -38,6 +46,8 @@ orphanRegionsOnRS = hbckChecker.getOrphanRegionsOnRS(); orphanRegionsOnFS = hbckChecker.getOrphanRegionsOnFS(); } + CatalogJanitor cj = master.getCatalogJanitor(); + CatalogJanitor.Report report = cj == null? null: cj.getLastReport(); %> @@ -54,22 +64,127 @@ <% } else { %> +
+ +
+ <% if (report != null && !report.isEmpty()) { + ZonedDateTime zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(report.now), + ZoneId.systemDefault()); + String iso8601 = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + %> +

Report created: <%= iso8601 %> (Run catalogjanitor_run in hbase shell to generate a new report).

+ <% if (!report.holes.isEmpty()) { %> +
+ +
+ + + + + + + + <% for (Pair p : report.holes) { %> + + + + + + + <% } %> + +

<%= report.holes.size() %> hole(s).

+
Row before holeRegionInfoRow after holeRegionInfo
<%= Bytes.toString(p.getFirst().metaRow) %><%= p.getFirst().regionInfo %><%= Bytes.toString(p.getSecond().metaRow) %><%= p.getSecond().regionInfo %>
+ <% } %> + <% if (!report.overlaps.isEmpty()) { %> +
+ +
+ + + + + + + + <% for (Pair p : report.overlaps) { %> + + + + + + + <% } %> + +

<%= report.overlaps.size() %> overlap(s).

+
RowRegionInfoOther RowOther RegionInfo
<%= Bytes.toString(p.getFirst().metaRow) %><%= p.getFirst().regionInfo %><%= Bytes.toString(p.getSecond().metaRow) %><%= p.getSecond().regionInfo %>
+ <% } %> + <% if (!report.unknownServers.isEmpty()) { %> +
+ +
+ + + + + + + <% for (Pair p: report.unknownServers) { %> + + + + + + <% } %> + +

<%= report.unknownServers.size() %> unknown servers(s).

+
RowServerNameRegionInfo
<%= Bytes.toString(p.getFirst().metaRow) %><%= p.getSecond() %><%= p.getFirst().regionInfo %>
+ <% } %> + <% if (!report.emptyRegionInfo.isEmpty()) { %> +
+ +
+ + + + + <% for (byte [] row: report.emptyRegionInfo) { %> + + + + <% } %> + +

<%= report.emptyRegionInfo.size() %> emptyRegionInfo(s).

+
Row
<%= Bytes.toString(row) %>
+ <% } %> + <% } %> +
+
+ + <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>

- There are three case: 1. Master thought this region opened, but no regionserver reported it. - 2. Master thought this region opened on Server1, but regionserver reported Server2. + There are three case: 1. Master thought this region opened, but no regionserver reported it; + 2. Master thought this region opened on Server1, but regionserver reported Server2: 3. More than one regionservers reported opened this region. Notice: the reported online regionservers may be not right when there are regions in transition. Please check them in regionserver's web UI.

- - - - <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %> @@ -136,4 +251,4 @@ <% } %> - \ No newline at end of file + -- 2.19.1
Region