Index: src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (revision 1127186) +++ src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (working copy) @@ -114,7 +114,12 @@ /** * Started in on the creation of the second daughter region. */ - STARTED_REGION_B_CREATION + STARTED_REGION_B_CREATION, + /** + * Point of no return. + * If we got here, then transaction is not recoverable. + */ + PONR } /* @@ -243,20 +248,20 @@ this.journal.add(JournalEntry.STARTED_REGION_B_CREATION); HRegion b = createDaughterRegion(this.hri_b, this.parent.flushRequester); + + // This is the point of no return. Adding edits to .META. can fail in + // various interesting ways the most interesting of which is a timeout + // BUT the edits all went through (See HBASE-3872). + this.journal.add(JournalEntry.PONR); + + // TODO: Could we be smarter about the sequence in which we do these steps? + // Edit parent in meta if (!testing) { MetaEditor.offlineParentInMeta(server.getCatalogTracker(), this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo()); } - // The is the point of no return. We are committed to the split now. We - // have still the daughter regions to open but meta has been changed. - // If we fail from here on out, we can not rollback so, we'll just abort. - // The meta has been changed though so there will need to be a fixup run - // during processing of the crashed server by master (TODO: Verify this in place). - - // TODO: Could we be smarter about the sequence in which we do these steps? - if (!testing) { // Open daughters in parallel. DaughterOpener aOpener = new DaughterOpener(server, services, a); @@ -267,7 +272,8 @@ aOpener.join(); bOpener.join(); } catch (InterruptedException e) { - server.abort("Exception running daughter opens", e); + Thread.currentThread().interrupt(); + throw new IOException("Interrupted " + e.getMessage()); } } @@ -532,8 +538,11 @@ * by unit tests. * @return The region we were splitting * @throws IOException If thrown, rollback failed. Take drastic action. + * @return True if we successfully rolled back, false if we got to the point + * of no return and so now need to abort the server to minimize damage. */ - public void rollback(final OnlineRegions or) throws IOException { + public boolean rollback(final OnlineRegions or) throws IOException { + boolean result = true; FileSystem fs = this.parent.getFilesystem(); ListIterator iterator = this.journal.listIterator(this.journal.size()); @@ -568,10 +577,16 @@ if (or != null) or.addToOnlineRegions(this.parent); break; + case PONR: + // We got to the point-of-no-return so we need to abort after cleanup + result = false; + break; + default: throw new RuntimeException("Unhandled journal entry: " + je); } } + return result; } HRegionInfo getFirstDaughter() { Index: src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java (revision 1127186) +++ src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java (working copy) @@ -156,16 +156,20 @@ st.execute(this.server, this.server); } catch (IOException ioe) { try { - LOG.info("Running rollback of failed split of " + + LOG.info("Running rollback/cleanup of failed split of " + parent.getRegionNameAsString() + "; " + ioe.getMessage()); - st.rollback(this.server); - LOG.info("Successful rollback of failed split of " + - parent.getRegionNameAsString()); + if (st.rollback(this.server)) { + LOG.info("Successful rollback of failed split of " + + parent.getRegionNameAsString()); + } else { + this.server.abort("Abort; we got an error after point-of-no-return"); + } } catch (Exception ee) { + String msg = "Failed rollback of failed split of " + + parent.getRegionNameAsString() + " -- aborting server"; // If failed rollback, kill this server to avoid having a hole in table. - LOG.info("Failed rollback of failed split of " + - parent.getRegionNameAsString() + " -- aborting server", ee); - this.server.abort("Failed split"); + LOG.info(msg, ee); + this.server.abort(msg); } return; } Index: src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (revision 1127186) +++ src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (working copy) @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Writables; /** @@ -161,11 +162,11 @@ throws IOException { boolean result = false; // Run checks on each daughter split. - boolean hasReferencesA = + Pair a = checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER); - boolean hasReferencesB = + Pair b = checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER); - if (!hasReferencesA && !hasReferencesB) { + if ((a.getFirst() && !a.getSecond()) && (b.getFirst() && !b.getSecond())) { LOG.debug("Deleting region " + parent.getRegionNameAsString() + " because daughter splits no longer hold references"); // This latter regionOffline should not be necessary but is done for now @@ -192,14 +193,26 @@ * @param parent * @param rowContent * @param qualifier - * @return True if this daughter still has references to the parent. + * @return A pair where the first boolean says whether or not the daughter + * region directory exists in the filesystem and then the second boolean says + * whether the daughter has references to the parent. * @throws IOException */ - boolean checkDaughter(final HRegionInfo parent, + Pair checkDaughter(final HRegionInfo parent, final Result rowContent, final byte [] qualifier) throws IOException { HRegionInfo hri = getDaughterRegionInfo(rowContent, qualifier); - return hasReferences(parent, rowContent, hri, qualifier); + Pair result = + checkDaughterInFs(parent, rowContent, hri, qualifier); + if (result.getFirst() && !result.getSecond()) { + // Remove daughter from the parent IFF the daughter region exists in FS. + // If there is no daughter region in the filesystem, must be because of + // a failed split. The ServerShutdownHandler will do the fixup. Don't + // do any deletes in here that could intefere with ServerShutdownHandler + // fixup + removeDaughterFromParent(parent, hri, qualifier); + } + return result; } /** @@ -237,23 +250,35 @@ /** * Checks if a daughter region -- either splitA or splitB -- still holds * references to parent. If not, removes reference to the split from - * the parent meta region row so we don't check it any more. + * the parent meta region row so we don't check it any more. Also checks + * daughter region exists in the filesytem. * @param parent Parent region name. * @param rowContent Keyed content of the parent row in meta region. * @param split Which column family. * @param qualifier Which of the daughters to look at, splitA or splitB. - * @return True if still has references to parent. + * @return A pair where the first boolean says whether or not the daughter + * region directory exists in the filesystem and then the second boolean says + * whether the daughter has references to the parent. * @throws IOException */ - boolean hasReferences(final HRegionInfo parent, + Pair checkDaughterInFs(final HRegionInfo parent, final Result rowContent, final HRegionInfo split, final byte [] qualifier) throws IOException { - boolean result = false; - if (split == null) return result; + boolean references = false; + boolean exists = false; + if (split == null) { + return new Pair(Boolean.FALSE, Boolean.FALSE); + } FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); Path rootdir = this.services.getMasterFileSystem().getRootDir(); Path tabledir = new Path(rootdir, split.getTableDesc().getNameAsString()); + Path regiondir = new Path(tabledir, split.getEncodedName()); + exists = fs.exists(regiondir); + if (!exists) { + LOG.warn("Daughter regiondir does not exist: " + regiondir.toString()); + return new Pair(exists, Boolean.FALSE); + } for (HColumnDescriptor family: split.getTableDesc().getFamilies()) { Path p = Store.getStoreHomedir(tabledir, split.getEncodedName(), family.getName()); @@ -267,13 +292,11 @@ ); if (ps != null && ps.length > 0) { - result = true; + references = true; break; } } - if (!result) { - removeDaughterFromParent(parent, split, qualifier); - } - return result; + return new Pair(Boolean.valueOf(exists), + Boolean.valueOf(references)); } } Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1127186) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -22,8 +22,6 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.io.EOFException; -import java.net.ConnectException; import java.lang.Thread.UncaughtExceptionHandler; import java.util.ArrayList; import java.util.HashMap; @@ -55,9 +53,10 @@ import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.catalog.RootLocationEditor; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.RegionTransitionData; -import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler; @@ -68,9 +67,9 @@ import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData; import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; -import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData; import org.apache.hadoop.io.Writable; import org.apache.hadoop.ipc.RemoteException; import org.apache.zookeeper.AsyncCallback; Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1127186) +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -216,11 +216,12 @@ */ static void fixupDaughters(final Result result, final AssignmentManager assignmentManager, - final CatalogTracker catalogTracker) throws IOException { + final CatalogTracker catalogTracker) + throws IOException { fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager, - catalogTracker); + catalogTracker); fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager, - catalogTracker); + catalogTracker); } /** @@ -261,8 +262,8 @@ } /** - * Look for presence of the daughter OR of a split of the daughter. Daughter - * could have been split over on regionserver before a run of the + * Look for presence of the daughter OR of a split of the daughter in .META. + * Daughter could have been split over on regionserver before a run of the * catalogJanitor had chance to clear reference from parent. * @param daughter Daughter region to search for. * @throws IOException @@ -324,4 +325,4 @@ return false; } } -} +} \ No newline at end of file