diff --git src/main/java/org/apache/hadoop/hbase/master/HMaster.java src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 3c7b68d..ead030a 100644 --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -25,6 +25,7 @@ import java.lang.reflect.InvocationTargetException; import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; @@ -67,6 +68,7 @@ import org.apache.hadoop.hbase.master.handler.DeleteTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; import org.apache.hadoop.hbase.master.handler.ModifyTableHandler; +import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler; import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler; import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler; @@ -396,6 +398,9 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { this.assignmentManager.processFailover(); } + // Fixing up missing daughters if any + fixupDaughters(); + // Start balancer and meta catalog janitor after meta and regions have // been assigned. this.balancerChore = getAndStartBalancerChore(this); @@ -448,6 +453,37 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { return assigned; } + void fixupDaughters() throws IOException { + final Map offlineSplitParents = + new HashMap(); + // This visitor collects offline split parents in the .META. table + MetaReader.Visitor visitor = new MetaReader.Visitor() { + @Override + public boolean visit(Result r) throws IOException { + if (r == null || r.isEmpty()) return true; + HRegionInfo info = CatalogJanitor.getHRegionInfo(r); + if (info == null) return true; // Keep scanning + if (info.isOffline() && info.isSplit()) { + offlineSplitParents.put(info, r); + } + // Returning true means "keep scanning" + return true; + } + }; + // Run full scan of .META. catalog table passing in our custom visitor + MetaReader.fullScan(this.catalogTracker, visitor); + // Now work on our list of found parents. See if any we can clean up. + int fixups = 0; + for (Map.Entry e : offlineSplitParents.entrySet()) { + fixups += ServerShutdownHandler.fixupDaughters( + e.getValue(), assignmentManager, catalogTracker); + } + if (fixups != 0) { + LOG.info("Scanned the catalog and fixed up " + fixups + + " missing daughter region(s)"); + } + } + /* * @return This masters' address. * @throws UnknownHostException diff --git src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java index 466f237..9eafc69 100644 --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java @@ -249,30 +249,33 @@ public class ServerShutdownHandler extends EventHandler { * Check that daughter regions are up in .META. and if not, add them. * @param hris All regions for this server in meta. * @param result The contents of the parent row in .META. + * @return the number of daughters missing and fixed * @throws IOException */ - static void fixupDaughters(final Result result, + public static int fixupDaughters(final Result result, final AssignmentManager assignmentManager, final CatalogTracker catalogTracker) throws IOException { - fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager, - catalogTracker); - fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager, - catalogTracker); + int fixedA = fixupDaughter(result, HConstants.SPLITA_QUALIFIER, + assignmentManager, catalogTracker); + int fixedB = fixupDaughter(result, HConstants.SPLITB_QUALIFIER, + assignmentManager, catalogTracker); + return fixedA + fixedB; } /** * Check individual daughter is up in .META.; fixup if its not. * @param result The contents of the parent row in .META. * @param qualifier Which daughter to check for. + * @return 1 if the daughter is missing and fixed. Otherwise 0 * @throws IOException */ - static void fixupDaughter(final Result result, final byte [] qualifier, + static int fixupDaughter(final Result result, final byte [] qualifier, final AssignmentManager assignmentManager, final CatalogTracker catalogTracker) throws IOException { HRegionInfo daughter = getHRegionInfo(result, qualifier); - if (daughter == null) return; + if (daughter == null) return 0; if (isDaughterMissing(catalogTracker, daughter)) { LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString()); MetaEditor.addDaughter(catalogTracker, daughter, null); @@ -283,9 +286,11 @@ public class ServerShutdownHandler extends EventHandler { // And assign it. assignmentManager.assign(daughter, true); + return 1; } else { LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present"); } + return 0; } /** diff --git src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java index cfbe6c8..b3cc46a 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java @@ -157,7 +157,7 @@ public class CompactSplitThread extends Thread implements CompactionRequestor { } catch (IOException ioe) { try { LOG.info("Running rollback/cleanup of failed split of " - + parent.getRegionNameAsString() + "; " + ioe.getMessage()); + + parent.getRegionNameAsString() + "; " + ioe.getMessage(), ioe); if (st.rollback(this.server, this.server)) { LOG.info("Successful rollback of failed split of " + parent.getRegionNameAsString());