Index: src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java (revision 1508318) +++ src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java (working copy) @@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Delete; @@ -547,7 +548,7 @@ /** * Test that we could recover the data correctly after aborting flush. In the * test, first we abort flush after writing some data, then writing more data - * and flush again, at last verify the data. + * and flush again, at last verify the flush result. * @throws IOException */ @Test @@ -618,39 +619,19 @@ Mockito.doReturn(true).when(rsServices).isAborted(); } // writing more data - int moreRow = 10; - for (int i = writtenRowCount; i < writtenRowCount + moreRow; i++) { - Put put = new Put(Bytes.toBytes(tableNameStr + Integer.toString(i))); - put.add(families.get(i % families.size()).getName(), Bytes.toBytes("q"), - Bytes.toBytes("val")); - region.put(put); - } - writtenRowCount += moreRow; - // call flush again - throwExceptionWhenFlushing.set(false); try { - region.flushcache(); - } catch (IOException t) { - LOG.info("Expected exception when flushing region because server is stopped," - + t.getMessage()); + int moreRow = 10; + for (int i = writtenRowCount; i < writtenRowCount + moreRow; i++) { + Put put = new Put(Bytes.toBytes(tableNameStr + Integer.toString(i))); + put.add(families.get(i % families.size()).getName(), Bytes.toBytes("q"), + Bytes.toBytes("val")); + region.put(put); + fail("No exception thrown."); + } + } catch (Exception ex) { + assertTrue(ex instanceof NotServingRegionException); + assertTrue(ex.getMessage().contains("is closing")); } - - region.close(true); - wal.close(); - - // Let us try to split and recover - runWALSplit(this.conf); - HLog wal2 = createWAL(this.conf); - Mockito.doReturn(false).when(rsServices).isAborted(); - HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, - rsServices); - long seqid2 = region2.initialize(); - // HRegionServer usually does this. It knows the largest seqid across all - // regions. - wal2.setSequenceNumber(seqid2); - - scanner = region2.getScanner(new Scan()); - assertEquals(writtenRowCount, getScannedCount(scanner)); } private int getScannedCount(RegionScanner scanner) throws IOException { Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 1508318) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -1437,7 +1437,7 @@ dataInMemoryWithoutWAL.set(0); } synchronized (writestate) { - if (!writestate.flushing && writestate.writesEnabled) { + if (!writestate.flushing && writestate.writesEnabled && !this.closing.get()) { this.writestate.flushing = true; } else { if (LOG.isDebugEnabled()) { @@ -1670,6 +1670,8 @@ Bytes.toStringBinary(getRegionName())); dse.initCause(t); status.abort("Flush failed: " + StringUtils.stringifyException(t)); + // we'll abort this server soon, let's set closing flag to avoid potential data lost issue + this.closing.set(true); throw dse; } Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1508318) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -65,6 +65,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants.OperationStatusCode; +import org.apache.hadoop.hbase.DroppedSnapshotException; import org.apache.hadoop.hbase.HDFSBlocksDistribution; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; @@ -3258,9 +3259,17 @@ checkOpen(); LOG.info("Flushing " + regionInfo.getRegionNameAsString()); HRegion region = getRegion(regionInfo.getRegionName()); - boolean needsCompaction = region.flushcache(); - if (needsCompaction) { - this.compactSplitThread.requestCompaction(region, "Compaction through user triggered flush"); + try { + boolean needsCompaction = region.flushcache(); + if (needsCompaction) { + this.compactSplitThread.requestCompaction(region, "Compaction through user triggered flush"); + } + } catch (DroppedSnapshotException ex) { + abort("Flush memstore failed. Hence aborting RS.", ex); + NotServingRegionException nsre = new NotServingRegionException( + "Aborting due to flush memstore failed"); + nsre.initCause(ex); + throw nsre; } } @@ -3276,7 +3285,15 @@ throws NotServingRegionException, IOException { checkOpen(); HRegion region = getRegion(regionInfo.getRegionName()); - region.flushcache(); + try { + region.flushcache(); + } catch (DroppedSnapshotException ex) { + abort("Flush memstore failed. Hence aborting RS.", ex); + NotServingRegionException nsre = new NotServingRegionException( + "Aborting due to flush memstore failed"); + nsre.initCause(ex); + throw nsre; + } region.forceSplit(splitPoint); compactSplitThread.requestSplit(region, region.checkSplit()); }