Index: src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (revision 1140971) +++ src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (working copy) @@ -60,7 +60,7 @@ /** * Executes region split as a "transaction". Call {@link #prepare()} to setup - * the transaction, {@link #execute(OnlineRegions)} to run the transaction and + * the transaction, {@link #execute(Server, RegionServerServices)} to run the transaction and * {@link #rollback(OnlineRegions)} to cleanup if execute fails. * *

Here is an example of how you would use this class: @@ -68,10 +68,10 @@ * SplitTransaction st = new SplitTransaction(this.conf, parent, midKey) * if (!st.prepare()) return; * try { - * st.execute(myOnlineRegions); + * st.execute(server, services); * } catch (IOException ioe) { * try { - * st.rollback(myOnlineRegions); + * st.rollback(server, services); * return; * } catch (RuntimeException e) { * myAbortable.abort("Failed split, abort"); @@ -127,7 +127,12 @@ /** * Started in on the creation of the second daughter region. */ - STARTED_REGION_B_CREATION + STARTED_REGION_B_CREATION, + /** + * Point of no return. + * If we got here, then transaction is not recoverable. + */ + PONR } /* @@ -284,9 +289,13 @@ this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo()); } - // This is the point of no return. We are committed to the split now. We - // have still the daughter regions to open but meta has been changed. - // If we fail from here on out, we cannot rollback so, we'll just abort. + // This is the point of no return. Adding edits to .META. can fail in + // various interesting ways the most interesting of which is a timeout + // BUT the edits all went through (See HBASE-3872). + this.journal.add(JournalEntry.PONR); + + // TODO: Could we be smarter about the sequence in which we do these steps? + if (!testing) { // Open daughters in parallel. DaughterOpener aOpener = new DaughterOpener(server, services, a); @@ -297,7 +306,8 @@ aOpener.join(); bOpener.join(); } catch (InterruptedException e) { - server.abort("Exception running daughter opens", e); + Thread.currentThread().interrupt(); + throw new IOException("Interrupted " + e.getMessage()); } } @@ -598,13 +608,15 @@ } /** - * @param or Object that can online/offline parent region. Can be passed null - * by unit tests. - * @return The region we were splitting + * @param server Hosting server instance. + * @param services * @throws IOException If thrown, rollback failed. Take drastic action. + * @return True if we successfully rolled back, false if we got to the point + * of no return and so now need to abort the server to minimize damage. */ - public void rollback(final Server server, final OnlineRegions or) + public boolean rollback(final Server server, final RegionServerServices services) throws IOException { + boolean result = true; FileSystem fs = this.parent.getFilesystem(); ListIterator iterator = this.journal.listIterator(this.journal.size()); @@ -642,13 +654,19 @@ break; case OFFLINED_PARENT: - if (or != null) or.addToOnlineRegions(this.parent); + services.addToOnlineRegions(this.parent); break; + case PONR: + // We got to the point-of-no-return so we need to abort after cleanup + result = false; + break; + default: throw new RuntimeException("Unhandled journal entry: " + je); } } + return result; } HRegionInfo getFirstDaughter() { Index: src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java (revision 1140971) +++ src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java (working copy) @@ -62,15 +62,20 @@ st.execute(this.server, this.server); } catch (Exception e) { try { - LOG.info("Running rollback of failed split of " + parent + "; " - + e.getMessage()); - st.rollback(this.server, this.server); - LOG.info("Successful rollback of failed split of " + parent); + LOG.info("Running rollback/cleanup of failed split of " + + parent.getRegionNameAsString() + "; " + e.getMessage()); + if (st.rollback(this.server, this.server)) { + LOG.info("Successful rollback of failed split of " + + parent.getRegionNameAsString()); + } else { + this.server.abort("Abort; we got an error after point-of-no-return"); + } } catch (RuntimeException ee) { - // If failed rollback, kill server to avoid having a hole in table. - LOG.info("Failed rollback of failed split of " - + parent.getRegionNameAsString() + " -- aborting server", ee); - this.server.abort("Failed split"); + String msg = "Failed rollback of failed split of " + + parent.getRegionNameAsString() + " -- aborting server"; + // If failed rollback, kill this server to avoid having a hole in table. + LOG.info(msg, ee); + this.server.abort(msg); } return; } Index: src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (revision 1140971) +++ src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (working copy) @@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; @@ -166,11 +167,11 @@ throws IOException { boolean result = false; // Run checks on each daughter split. - boolean hasReferencesA = + Pair a = checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER); - boolean hasReferencesB = + Pair b = checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER); - if (!hasReferencesA && !hasReferencesB) { + if ((a.getFirst() && !a.getSecond()) && (b.getFirst() && !b.getSecond())) { LOG.debug("Deleting region " + parent.getRegionNameAsString() + " because daughter splits no longer hold references"); // This latter regionOffline should not be necessary but is done for now @@ -197,14 +198,26 @@ * @param parent * @param rowContent * @param qualifier - * @return True if this daughter still has references to the parent. + * @return A pair where the first boolean says whether or not the daughter + * region directory exists in the filesystem and then the second boolean says + * whether the daughter has references to the parent. * @throws IOException */ - boolean checkDaughter(final HRegionInfo parent, + Pair checkDaughter(final HRegionInfo parent, final Result rowContent, final byte [] qualifier) throws IOException { HRegionInfo hri = getDaughterRegionInfo(rowContent, qualifier); - return hasReferences(parent, rowContent, hri, qualifier); + Pair result = + checkDaughterInFs(parent, rowContent, hri, qualifier); + if (result.getFirst() && !result.getSecond()) { + // Remove daughter from the parent IFF the daughter region exists in FS. + // If there is no daughter region in the filesystem, must be because of + // a failed split. The ServerShutdownHandler will do the fixup. Don't + // do any deletes in here that could intefere with ServerShutdownHandler + // fixup + removeDaughterFromParent(parent, hri, qualifier); + } + return result; } /** @@ -242,23 +255,35 @@ /** * Checks if a daughter region -- either splitA or splitB -- still holds * references to parent. If not, removes reference to the split from - * the parent meta region row so we don't check it any more. + * the parent meta region row so we don't check it any more. Also checks + * daughter region exists in the filesytem. * @param parent Parent region name. * @param rowContent Keyed content of the parent row in meta region. * @param split Which column family. * @param qualifier Which of the daughters to look at, splitA or splitB. - * @return True if still has references to parent. + * @return A pair where the first boolean says whether or not the daughter + * region directory exists in the filesystem and then the second boolean says + * whether the daughter has references to the parent. * @throws IOException */ - boolean hasReferences(final HRegionInfo parent, + Pair checkDaughterInFs(final HRegionInfo parent, final Result rowContent, final HRegionInfo split, final byte [] qualifier) throws IOException { - boolean result = false; - if (split == null) return result; + boolean references = false; + boolean exists = false; + if (split == null) { + return new Pair(Boolean.FALSE, Boolean.FALSE); + } FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); Path rootdir = this.services.getMasterFileSystem().getRootDir(); Path tabledir = new Path(rootdir, split.getTableNameAsString()); + Path regiondir = new Path(tabledir, split.getEncodedName()); + exists = fs.exists(regiondir); + if (!exists) { + LOG.warn("Daughter regiondir does not exist: " + regiondir.toString()); + return new Pair(exists, Boolean.FALSE); + } HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableName()); for (HColumnDescriptor family: parentDescriptor.getFamilies()) { @@ -275,18 +300,16 @@ ); if (ps != null && ps.length > 0) { - result = true; + references = true; break; } } - if (!result) { - removeDaughterFromParent(parent, split, qualifier); - } - return result; + return new Pair(Boolean.valueOf(exists), + Boolean.valueOf(references)); } private HTableDescriptor getTableDescriptor(byte[] tableName) throws TableExistsException, FileNotFoundException, IOException { return this.services.getTableDescriptors().get(Bytes.toString(tableName)); } -} \ No newline at end of file +} Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1140971) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -68,6 +68,7 @@ import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData; import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.io.Writable; Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1140971) +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -236,11 +236,12 @@ */ static void fixupDaughters(final Result result, final AssignmentManager assignmentManager, - final CatalogTracker catalogTracker) throws IOException { + final CatalogTracker catalogTracker) + throws IOException { fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager, - catalogTracker); + catalogTracker); fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager, - catalogTracker); + catalogTracker); } /** @@ -281,8 +282,8 @@ } /** - * Look for presence of the daughter OR of a split of the daughter. Daughter - * could have been split over on regionserver before a run of the + * Look for presence of the daughter OR of a split of the daughter in .META. + * Daughter could have been split over on regionserver before a run of the * catalogJanitor had chance to clear reference from parent. * @param daughter Daughter region to search for. * @throws IOException @@ -344,4 +345,4 @@ return false; } } -} +} \ No newline at end of file