Index: src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestTimestamp.java =================================================================== --- src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestTimestamp.java (revision 611651) +++ src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestTimestamp.java (working copy) @@ -105,7 +105,8 @@ * @throws IOException */ public void testTimestamps() throws IOException { - final MiniHBaseCluster cluster = new MiniHBaseCluster(this.conf, 1); + final MiniHBaseCluster cluster = + new MiniHBaseCluster(this.conf, 1, this.cluster, true); try { HTable t = createTable(); Incommon incommon = new HTableIncommon(t); Index: src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java =================================================================== --- src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java (revision 611651) +++ src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java (working copy) @@ -1091,13 +1091,13 @@ } /** Add to the outbound message buffer */ - private void reportOpen(HRegion region) { - outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_OPEN, region.getRegionInfo())); + private void reportOpen(HRegionInfo region) { + outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_OPEN, region)); } /** Add to the outbound message buffer */ - private void reportClose(HRegion region) { - outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_CLOSE, region.getRegionInfo())); + private void reportClose(HRegionInfo region) { + outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_CLOSE, region)); } /** @@ -1222,7 +1222,14 @@ } catch (IOException e) { LOG.error("error opening region " + regionInfo.getRegionName(), e); - reportClose(region); + + // Mark the region offline. + // TODO: add an extra field in HRegionInfo to indicate that there is + // an error. We can't do that now because that would be an incompatible + // change that would require a migration + + regionInfo.setOffline(true); + reportClose(regionInfo); return; } this.lock.writeLock().lock(); @@ -1232,7 +1239,7 @@ } finally { this.lock.writeLock().unlock(); } - reportOpen(region); + reportOpen(regionInfo); } } @@ -1249,7 +1256,7 @@ if(region != null) { region.close(); if(reportWhenCompleted) { - reportClose(region); + reportClose(hri); } } } @@ -1269,7 +1276,7 @@ LOG.debug("closing region " + region.getRegionName()); } try { - region.close(abortRequested); + region.close(abortRequested, null); } catch (IOException e) { LOG.error("error closing region " + region.getRegionName(), RemoteExceptionHandler.checkIOException(e)); @@ -1303,7 +1310,7 @@ LOG.debug("closing region " + region.getRegionName()); } try { - region.close(false); + region.close(); } catch (IOException e) { LOG.error("error closing region " + region.getRegionName(), RemoteExceptionHandler.checkIOException(e)); Index: src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java =================================================================== --- src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (revision 611651) +++ src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (working copy) @@ -464,10 +464,10 @@ ) ) { - // The current assignment is no good + // The current assignment is invalid if (LOG.isDebugEnabled()) { LOG.debug("Current assignment of " + info.getRegionName() + - " is no good: storedInfo: " + storedInfo + ", startCode: " + + " is not valid: storedInfo: " + storedInfo + ", startCode: " + startCode + ", storedInfo.startCode: " + ((storedInfo != null)? storedInfo.getStartCode(): -1) + ", unassignedRegions: " + unassignedRegions.containsKey(info) + @@ -963,7 +963,9 @@ */ void unassignRootRegion() { this.rootRegionLocation.set(null); - this.unassignedRegions.put(HRegionInfo.rootRegionInfo, ZERO_L); + if (!this.shutdownRequested) { + this.unassignedRegions.put(HRegionInfo.rootRegionInfo, ZERO_L); + } } /** @@ -1622,10 +1624,15 @@ // Root region + if (region.isOffline()) { + // Can't proceed without root region. Shutdown. + LOG.fatal("root region is marked offline"); + shutdown(); + } unassignRootRegion(); } else { - boolean reassignRegion = true; + boolean reassignRegion = !region.isOffline(); boolean deleteRegion = false; if (killedRegions.remove(region.getRegionName())) { Index: src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java =================================================================== --- src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (revision 611651) +++ src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (working copy) @@ -353,7 +353,7 @@ * @throws IOException */ public List close() throws IOException { - return close(false); + return close(false, null); } /** @@ -364,13 +364,15 @@ * time-sensitive thread. * * @param abort true if server is aborting (only during testing) + * @param listener call back to alert caller on close status * @return Vector of all the storage files that the HRegion's component * HStores make use of. It's a list of HStoreFile objects. Can be null if * we are not to close at this time or we are already closed. * * @throws IOException */ - List close(boolean abort) throws IOException { + List close(boolean abort, + final RegionUnavailableListener listener) throws IOException { if (isClosed()) { LOG.info("region " + this.regionInfo.getRegionName() + " already closed"); return null; @@ -410,6 +412,13 @@ // outstanding updates. waitOnRowLocks(); + if (listener != null) { + // If there is a listener, let them know that we have now + // acquired all the necessary locks and are starting to + // do the close + listener.closing(getRegionName()); + } + // Don't flush the cache if we are aborting if (!abort) { internalFlushcache(snapshotMemcaches()); @@ -420,6 +429,13 @@ result.addAll(store.close()); } this.closed.set(true); + + if (listener != null) { + // If there is a listener, tell them that the region is now + // closed. + listener.closed(getRegionName()); + } + LOG.info("closed " + this.regionInfo.getRegionName()); return result; } finally { @@ -553,17 +569,10 @@ throw new IOException("Cannot split; target file collision at " + dirB); } - // Notify the caller that we are about to close the region. This moves - // us to the 'retiring' queue. Means no more updates coming in -- just - // whatever is outstanding. - if (listener != null) { - listener.closing(getRegionName()); - } - // Now close the HRegion. Close returns all store files or null if not // supposed to close (? What to do in this case? Implement abort of close?) // Close also does wait on outstanding rows and calls a flush just-in-case. - List hstoreFilesToSplit = close(); + List hstoreFilesToSplit = close(false, listener); if (hstoreFilesToSplit == null) { LOG.warn("Close came back null (Implement abort of close?)"); throw new RuntimeException("close returned empty vector of HStoreFiles");