Index: src/java/org/apache/hadoop/hbase/HServerLoad.java =================================================================== --- src/java/org/apache/hadoop/hbase/HServerLoad.java (revision 721904) +++ src/java/org/apache/hadoop/hbase/HServerLoad.java (working copy) @@ -288,13 +288,24 @@ @Override public boolean equals(Object o) { - return compareTo(o) == 0; + if (!(o instanceof HServerLoad)) { + return false; + } + HServerLoad other = (HServerLoad)o; + // traditional loading + if ((this.numberOfRegions != other.numberOfRegions) || + (this.numberOfRequests != other.numberOfRequests)) { + return false; + } + // also now heap sensitive + return this.usedHeapMB == other.usedHeapMB; } @Override public int hashCode() { int result = Integer.valueOf(numberOfRequests).hashCode(); result ^= Integer.valueOf(numberOfRegions).hashCode(); + result ^= Integer.valueOf(usedHeapMB).hashCode(); return result; } @@ -365,6 +376,13 @@ } /** + * @returns the amount of heap in use, in MB + */ + public int getUsedHeapMB() { + return usedHeapMB; + } + + /** * @param usedHeapMB the amount of heap in use, in MB */ public void setUsedHeapMB(int usedHeapMB) { @@ -372,6 +390,13 @@ } /** + * @returns the maximum allowable heap size, in MB + */ + public int getMaxHeapMB() { + return maxHeapMB; + } + + /** * @param maxHeapMB the maximum allowable heap size, in MB */ public void setMaxHeapMB(int maxHeapMB) { @@ -421,6 +446,26 @@ public int compareTo(Object o) { HServerLoad other = (HServerLoad) o; - return this.getLoad() - other.getLoad(); + // Be load and heap sensitive at the same time. + if (this.numberOfRegions < other.numberOfRegions) { + // If the number of regions is less, but heap user is greater, consider + // the load higher. Otherwise, lower. + if (this.usedHeapMB > other.usedHeapMB) { + return 1; + } else { + return -1; + } + } else if (this.numberOfRegions > other.numberOfRegions) { + // If the number of regions is greater, the load is greater always. + return 1; + } + // The number of regions is equal, so make a judgement based only on heap + // usage. + if (this.usedHeapMB < other.usedHeapMB) { + return -1; + } else if (this.usedHeapMB > other.usedHeapMB) { + return 1; + } + return 0; } } Index: src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 721904) +++ src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -22,6 +22,7 @@ import java.io.IOException; import java.lang.Thread.UncaughtExceptionHandler; import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; import java.lang.management.MemoryUsage; import java.lang.reflect.Constructor; import java.net.InetSocketAddress; @@ -106,7 +107,8 @@ */ public class HRegionServer implements HConstants, HRegionInterface, Runnable { static final Log LOG = LogFactory.getLog(HRegionServer.class); - + static final int FREE_HEAP_THRESHOLD = 20; // MB + // Set when a report to the master comes back with a message asking us to // shutdown. Also set by call to stop when debugging or running unit tests // of HRegionServer in isolation. We use AtomicBoolean rather than @@ -309,13 +311,48 @@ haveRootRegion.set(true); } } - long now = System.currentTimeMillis(); + long now = System.currentTimeMillis(); if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) { // It has been way too long since we last reported to the master. LOG.warn("unable to report to master for " + (now - lastMsg) + " milliseconds - retrying"); } if ((now - lastMsg) >= msgInterval) { + // interrogate memory MXBean for heap usage + MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); + MemoryUsage memory = memoryMXBean.getHeapMemoryUsage(); + int heapUsedMB = (int)(memory.getUsed()/1024/1024); + int heapTotalMB = (int)(memory.getMax()/1024/1024); + if ((heapTotalMB - heapUsedMB) < FREE_HEAP_THRESHOLD) { + // force garbage collection + System.gc(); + // check again + memory = memoryMXBean.getHeapMemoryUsage(); + heapUsedMB = (int)(memory.getUsed()/1024/1024); + heapTotalMB = (int)(memory.getMax()/1024/1024); + if ((heapTotalMB - heapUsedMB) < FREE_HEAP_THRESHOLD) { + // still underwater, jettison meta regions + for (HRegion r: onlineRegions.values()) { + HRegionInfo hri = r.regionInfo; + if (hri.isRootRegion() || hri.isMetaRegion()) { + // Add some hysterisis here (2 minutes) in case all + // regionservers might be under heap stress. Without it in + // that case all regionservers would immediately close meta + // regions reassigned to them, a cluster wide denial of + // service while the master tries in vain to find a home + // for them. (Of course a cluster where all region servers + // are under heap stress has other problems...) + long opened = r.getOpenedTime(); + if ((opened == 0) || ((now - opened) < 120000)) { + continue; + } + LOG.warn("closing meta region '" + + hri.getRegionNameAsString() + "' due to heap stress"); + closeRegion(hri, true); + } + } + } + } HMsg outboundArray[] = null; synchronized(this.outboundMsgs) { outboundArray = @@ -324,11 +361,8 @@ } try { doMetrics(); - MemoryUsage memory = - ManagementFactory.getMemoryMXBean().getHeapMemoryUsage(); - HServerLoad hsl = new HServerLoad(requestCount.get(), - (int)(memory.getUsed()/1024/1024), - (int)(memory.getMax()/1024/1024)); + HServerLoad hsl = new HServerLoad(requestCount.get(), heapUsedMB, + heapTotalMB); for (HRegion r: onlineRegions.values()) { byte[] name = r.getRegionName(); int stores = 0; Index: src/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 721904) +++ src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -191,6 +191,8 @@ private long minSequenceId; final AtomicInteger activeScannerCount = new AtomicInteger(0); + long openedTime; + ////////////////////////////////////////////////////////////////////////////// // Constructor ////////////////////////////////////////////////////////////////////////////// @@ -311,12 +313,19 @@ // HRegion is ready to go! this.writestate.compacting = false; - this.lastFlushTime = System.currentTimeMillis(); + this.lastFlushTime = this.openedTime = System.currentTimeMillis(); LOG.info("region " + this + "/" + this.regionInfo.getEncodedName() + " available"); } /** + * @return the time the region was opened, or 0 if the region is not yet open + */ + public long getOpenedTime() { + return openedTime; + } + + /** * @return Updates to this region need to have a sequence id that is >= to * the this number. */