From da57f3a9a87d5b3ede784c2e699af112b10c1284 Mon Sep 17 00:00:00 2001 From: stack Date: Mon, 29 Jul 2019 17:10:58 -0700 Subject: [PATCH] HBASE-22741 Show catalogjanitor consistency complaints in new 'HBCK Report' page Signed-off-by: huzheng Signed-off-by: Guanghao Zhang --- .../hadoop/hbase/master/CatalogJanitor.java | 68 ++++---- .../resources/hbase-webapps/master/hbck.jsp | 145 ++++++++++++++++-- 2 files changed, 175 insertions(+), 38 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index 37108d542c..adbb5937f2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -30,6 +30,7 @@ import java.util.Properties; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; @@ -43,8 +44,11 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; @@ -240,7 +244,7 @@ public class CatalogJanitor extends ScheduledChore { * @return Returns last published Report that comes of last successful scan * of hbase:meta. */ - Report getLastReport() { + public Report getLastReport() { return this.lastReport; } @@ -446,19 +450,21 @@ public class CatalogJanitor extends ScheduledChore { /** * Report made by {@link ReportMakingVisitor}. */ - static class Report { - private final long now = EnvironmentEdgeManager.currentTime(); + public static class Report { + public final long now = EnvironmentEdgeManager.currentTime(); // Keep Map of found split parents. These are candidates for cleanup. // Use a comparator that has split parents come before its daughters. - final Map splitParents = new TreeMap<>(new SplitParentFirstComparator()); - final Map mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR); + public final Map splitParents = + new TreeMap<>(new SplitParentFirstComparator()); + public final Map mergedRegions = + new TreeMap<>(RegionInfo.COMPARATOR); - final List> holes = new ArrayList<>(); - final List> overlaps = new ArrayList<>(); - final Map unknownServers = new HashMap(); - final List emptyRegionInfo = new ArrayList<>(); - int count = 0; + public final List> holes = new ArrayList<>(); + public final List> overlaps = new ArrayList<>(); + public final List> unknownServers = new ArrayList<>(); + public final List emptyRegionInfo = new ArrayList<>(); + public int count = 0; @VisibleForTesting Report() {} @@ -466,7 +472,7 @@ public class CatalogJanitor extends ScheduledChore { /** * @return True if an 'empty' lastReport -- no problems found. */ - boolean isEmpty() { + public boolean isEmpty() { return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() && this.emptyRegionInfo.isEmpty(); } @@ -478,28 +484,28 @@ public class CatalogJanitor extends ScheduledChore { if (sb.length() > 0) { sb.append(", "); } - sb.append("hole=" + Bytes.toString(p.getFirst().metaRow) + "/" + - Bytes.toString(p.getSecond().metaRow)); + sb.append("hole=" + Bytes.toStringBinary(p.getFirst().metaRow) + "/" + + Bytes.toStringBinary(p.getSecond().metaRow)); } for (Pair p: this.overlaps) { if (sb.length() > 0) { sb.append(", "); } - sb.append("overlap=").append(Bytes.toString(p.getFirst().metaRow)).append("/"). - append(Bytes.toString(p.getSecond().metaRow)); + sb.append("overlap=").append(Bytes.toStringBinary(p.getFirst().metaRow)).append("/"). + append(Bytes.toStringBinary(p.getSecond().metaRow)); } for (byte [] r: this.emptyRegionInfo) { if (sb.length() > 0) { sb.append(", "); } - sb.append("empty=").append(Bytes.toString(r)); + sb.append("empty=").append(Bytes.toStringBinary(r)); } - for (Map.Entry e: this.unknownServers.entrySet()) { + for (Pair p: this.unknownServers) { if (sb.length() > 0) { sb.append(", "); } - sb.append("unknown_server=").append(e.getKey()).append("/"). - append(e.getValue().getRegionNameAsString()); + sb.append("unknown_server=").append(p.getSecond()).append("/"). + append(Bytes.toStringBinary(p.getFirst().metaRow)); } return sb.toString(); } @@ -508,7 +514,7 @@ public class CatalogJanitor extends ScheduledChore { /** * Simple datastructure to hold a MetaRow content. */ - static class MetaRow { + public static class MetaRow { /** * A marker for use in case where there is a hole at the very * first row in hbase:meta. Should never happen. @@ -519,12 +525,12 @@ public class CatalogJanitor extends ScheduledChore { /** * Row from hbase:meta table. */ - final byte [] metaRow; + public final byte [] metaRow; /** * The decoded RegionInfo gotten from hbase:meta. */ - final RegionInfo regionInfo; + public final RegionInfo regionInfo; MetaRow(byte [] metaRow, RegionInfo regionInfo) { this.metaRow = metaRow; @@ -608,13 +614,14 @@ public class CatalogJanitor extends ScheduledChore { MetaTableAccessor.getRegionInfoColumn()); } else { ri = locations.getDefaultRegionLocation().getRegion(); - checkServer(locations); + checkServer(metaTableRow.getRow(), locations); } if (ri == null) { this.report.emptyRegionInfo.add(metaTableRow.getRow()); return ri; } + MetaRow mrri = new MetaRow(metaTableRow.getRow(), ri); // If table is disabled, skip integrity check. if (!isTableDisabled(ri)) { @@ -673,7 +680,7 @@ public class CatalogJanitor extends ScheduledChore { /** * Run through referenced servers and save off unknown and the dead. */ - private void checkServer(RegionLocations locations) { + private void checkServer(byte [] metaTableRow, RegionLocations locations) { if (this.services == null) { // Can't do this test if no services. return; @@ -691,7 +698,8 @@ public class CatalogJanitor extends ScheduledChore { isServerKnownAndOnline(sn); switch (state) { case UNKNOWN: - this.report.unknownServers.put(sn, location.getRegion()); + this.report.unknownServers.add( + new Pair(new MetaRow(metaTableRow, location.getRegion()), sn)); break; default: @@ -736,20 +744,22 @@ public class CatalogJanitor extends ScheduledChore { public static void main(String [] args) throws IOException { checkLog4jProperties(); ReportMakingVisitor visitor = new ReportMakingVisitor(null); - try (Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create())) { + Configuration configuration = HBaseConfiguration.create(); + configuration.setBoolean("hbase.defaults.for.version.skip", true); + try (Connection connection = ConnectionFactory.createConnection(configuration)) { /* Used to generate an overlap. - Get g = new Get(Bytes.toBytes("t2,40,1563939166317.5a8be963741d27e9649e5c67a34259d9.")); + */ + Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0.")); g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); try (Table t = connection.getTable(TableName.META_TABLE_NAME)) { Result r = t.get(g); byte [] row = g.getRow(); - row[row.length - 3] <<= ((byte)row[row.length -3]); + row[row.length - 2] <<= ((byte)row[row.length - 2]); Put p = new Put(g.getRow()); p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)); t.put(p); } - */ MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null); Report report = visitor.getReport(); LOG.info(report != null? report.toString(): "empty"); diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp index 0245d4771d..0c5f8a0fef 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp @@ -18,15 +18,23 @@ */ --%> <%@ page contentType="text/html;charset=UTF-8" + import="java.time.Instant" + import="java.time.ZoneId" import="java.util.Date" import="java.util.List" import="java.util.Map" import="java.util.stream.Collectors" + import="java.time.ZonedDateTime" + import="java.time.format.DateTimeFormatter" %> <%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %> <%@ page import="org.apache.hadoop.hbase.master.HMaster" %> <%@ page import="org.apache.hadoop.hbase.ServerName" %> +<%@ page import="org.apache.hadoop.hbase.util.Bytes" %> <%@ page import="org.apache.hadoop.hbase.util.Pair" %> +<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %> +<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %> +<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.MetaRow" %> <% HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER); pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName()); @@ -43,6 +51,14 @@ startTimestamp = hbckChecker.getCheckingStartTimestamp(); endTimestamp = hbckChecker.getCheckingEndTimestamp(); } + ZonedDateTime zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTimestamp), + ZoneId.systemDefault()); + String iso8601start = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(endTimestamp), + ZoneId.systemDefault()); + String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + CatalogJanitor cj = master.getCatalogJanitor(); + CatalogJanitor.Report report = cj == null? null: cj.getLastReport(); %> @@ -61,29 +77,32 @@
+
+
+ + <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>

- There are three case: 1. Master thought this region opened, but no regionserver reported it. - 2. Master thought this region opened on Server1, but regionserver reported Server2. - 3. More than one regionservers reported opened this region. + There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assigns + command; 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix: + need to check the server is still exist. If not, schedule SCP for it. If exist, restart Server2 and Server1): + 3. More than one regionservers reported opened this region (Fix: restart the RegionServers). Notice: the reported online regionservers may be not right when there are regions in transition. Please check them in regionserver's web UI.

- - - <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %> @@ -147,7 +166,115 @@
Region
<% } %> +
+ +
+ <% if (report != null && !report.isEmpty()) { + zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(report.now), + ZoneId.systemDefault()); + String iso8601reportTime = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(System.currentTimeMillis()), + ZoneId.systemDefault()); + String iso8601Now = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + %> +

Report created: <%= iso8601reportTime %> (now=<%= iso8601Now %>). Run catalogjanitor_run in hbase shell to generate a new sub-report.

+ <% if (!report.holes.isEmpty()) { %> +
+ +
+ + + + + + + + <% for (Pair p : report.holes) { %> + + + + + + + <% } %> + +

<%= report.holes.size() %> hole(s).

+
Row before holeRegionInfoRow after holeRegionInfo
<%= Bytes.toStringBinary(p.getFirst().metaRow) %><%= p.getFirst().regionInfo %><%= Bytes.toStringBinary(p.getSecond().metaRow) %><%= p.getSecond().regionInfo %>
+ <% } %> + <% if (!report.overlaps.isEmpty()) { %> +
+ +
+ + + + + + + + <% for (Pair p : report.overlaps) { %> + + + + + + + <% } %> + +

<%= report.overlaps.size() %> overlap(s).

+
RowRegionInfoOther RowOther RegionInfo
<%= Bytes.toStringBinary(p.getFirst().metaRow) %><%= p.getFirst().regionInfo %><%= Bytes.toStringBinary(p.getSecond().metaRow) %><%= p.getSecond().regionInfo %>
+ <% } %> + <% if (!report.unknownServers.isEmpty()) { %> +
+ +
+ + + + + + + <% for (Pair p: report.unknownServers) { %> + + + + + + <% } %> + +

<%= report.unknownServers.size() %> unknown servers(s).

+
RowServerNameRegionInfo
<%= Bytes.toStringBinary(p.getFirst().metaRow) %><%= p.getSecond() %><%= p.getFirst().regionInfo %>
+ <% } %> + <% if (!report.emptyRegionInfo.isEmpty()) { %> +
+ +
+ + + + + <% for (byte [] row: report.emptyRegionInfo) { %> + + + + <% } %> + +

<%= report.emptyRegionInfo.size() %> emptyRegionInfo(s).

+
Row
<%= Bytes.toStringBinary(row) %>
+ <% } %> + <% } %> + <% } %> - \ No newline at end of file + -- 2.19.1