Index: src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java --- src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java 2011-10-17 10:01:25.000000000 +0800 +++ src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java 2011-10-17 10:02:26.000000000 +0800 @@ -254,19 +254,29 @@ this.journal.add(JournalEntry.STARTED_REGION_B_CREATION); HRegion b = createDaughterRegion(this.hri_b, this.parent.flushRequester); + // This is the point of no return. Adding subsequent edits to .META. as we + // do below when we do the daughter opens adding each to .META. can fail in + // various interesting ways the most interesting of which is a timeout + // BUT the edits all go through (See HBASE-3872). IF we reach the PONR + // then subsequent failures need to crash out this regionserver; the + // server shutdown processing should be able to fix-up the incomplete split. + // The offlined parent will have the daughters as extra columns. If + // we leave the daughter regions in place and do not remove them when we + // crash out, then they will have their references to the parent in place + // still and the server shutdown fixup of .META. will point to these + // regions. + // We should add PONR JournalEntry before offlineParentInMeta,so even if + // OfflineParentInMeta timeout,this will cause regionserver exit,and then + // master ServerShutdownHandler will fix daughter & avoid data loss. See ( + // HBASE-4562). + this.journal.add(JournalEntry.PONR); + // Edit parent in meta. Offlines parent region and adds splita and splitb. if (!testing) { MetaEditor.offlineParentInMeta(server.getCatalogTracker(), this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo()); } - // This is the point of no return. Adding subsequent edits to .META. as we - // do below when we do the daugther opens adding each to .META. can fail in - // various interesting ways the most interesting of which is a timeout - // BUT the edits all go through (See HBASE-3872). IF we reach the POWR - // then subsequent failures need to crash out this regionserver; the - // server shutdown processing should be able to fix-up the incomplete split. - this.journal.add(JournalEntry.PONR); // Open daughters in parallel. DaughterOpener aOpener = new DaughterOpener(server, services, a); DaughterOpener bOpener = new DaughterOpener(server, services, b);