Index: src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (revision 12813) +++ src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (working copy) @@ -3,7 +3,7 @@ * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information + * distributed with this work for additional writermation * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance @@ -19,6 +19,8 @@ */ package org.apache.hadoop.hbase; +import java.io.BufferedWriter; +import java.io.FileWriter; import java.io.IOException; import java.util.List; import java.util.ArrayList; @@ -36,9 +38,12 @@ /** * Test whether region rebalancing works. (HBASE-71) + * Test HBASE-3663 whether region rebalancing works after a new server booted + * especially when no server has more regions than the ceils of avg load */ public class TestRegionRebalancing extends HBaseClusterTestCase { final Log LOG = LogFactory.getLog(this.getClass().getName()); + HTable table; HTableDescriptor desc; @@ -67,14 +72,14 @@ // create a 20-region table by writing directly to disk List startKeys = new ArrayList(); startKeys.add(null); - for (int i = 10; i < 29; i++) { + for (int i = 10; i < 70; i++) { startKeys.add(Bytes.toBytes("row_" + i)); } startKeys.add(null); - LOG.info(startKeys.size() + " start keys generated"); - + LOG.debug(startKeys.size() + " start keys generated"); + List regions = new ArrayList(); - for (int i = 0; i < 20; i++) { + for (int i = 0; i < 60; i++) { regions.add(createAregion(startKeys.get(i), startKeys.get(i+1))); } @@ -89,46 +94,37 @@ } /** - * For HBASE-71. Try a few different configurations of starting and stopping - * region servers to see if the assignment or regions is pretty balanced. - * @throws IOException + * In this case, create 16 servers here, there will be 17 servers and 62 regions totally. + * When one of the server shuts down, the avg load is 3.875. + * When this server comes back, the avg load will be 3.64 + * Set the slot number near 0, so no server's load will large than 4. + * The load balance algorithm should handle this case properly. */ public void testRebalancing() throws IOException { - table = new HTable(conf, "test"); - assertEquals("Test table should have 20 regions", - 20, table.getStartKeys().length); - // verify that the region assignments are balanced to start out - assertRegionsAreBalanced(); - - LOG.debug("Adding 2nd region server."); - // add a region server - total of 2 - cluster.startRegionServer(); - assertRegionsAreBalanced(); - - // add a region server - total of 3 - LOG.debug("Adding 3rd region server."); - cluster.startRegionServer(); - assertRegionsAreBalanced(); - - // kill a region server - total of 2 - LOG.debug("Killing the 3rd region server."); + for (int i = 1; i <= 16; i++){ + LOG.debug("Adding region server #"+i); + cluster.startRegionServer(); + checkingServerStatus(); + } + + LOG.debug("Restart: killing 1 region server."); cluster.stopRegionServer(2, false); cluster.waitOnRegionServer(2); assertRegionsAreBalanced(); - - // start two more region servers - total of 4 - LOG.debug("Adding 3rd region server"); + + LOG.debug("Restart: adding that region server back"); cluster.startRegionServer(); - LOG.debug("Adding 4th region server"); - cluster.startRegionServer(); assertRegionsAreBalanced(); - - for (int i = 0; i < 6; i++){ - LOG.debug("Adding " + (i + 5) + "th region server"); - cluster.startRegionServer(); + } + + private void checkingServerStatus() { + List servers = getOnlineRegionServers(); + double avg = cluster.getMaster().getAverageLoad(); + for (HRegionServer server : servers) { + int serverLoad = server.getOnlineRegions().size(); + LOG.debug(server.hashCode() + " Avg: " + avg + " actual: " + serverLoad); } - assertRegionsAreBalanced(); } /** figure out how many regions are currently being served. */ @@ -149,7 +145,7 @@ boolean success = false; float slop = conf.getFloat("hbase.regions.slop", (float)0.1); if (slop <= 0) slop = 1; - + for (int i = 0; i < 5; i++) { success = true; // make sure all the regions are reassigned before we test balance @@ -160,17 +156,18 @@ double avg = cluster.getMaster().getAverageLoad(); int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop)); int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1; + LOG.debug("There are " + servers.size() + " servers and " + regionCount + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop + ", up border: " + avgLoadPlusSlop + "; attempt: " + i); - + for (HRegionServer server : servers) { - int serverLoad = server.getOnlineRegions().size(); - LOG.debug(server.hashCode() + " Avg: " + avg + " actual: " + serverLoad); + int serverLoad = server.getOnlineRegions().size(); + LOG.debug(server.hashCode() + " Avg: " + avg + " actual: " + serverLoad); if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop - && serverLoad >= avgLoadMinusSlop)) { + && serverLoad >= avgLoadMinusSlop)) { LOG.debug(server.hashCode() + " Isn't balanced!!! Avg: " + avg + - " actual: " + serverLoad + " slop: " + slop); + " actual: " + serverLoad + " slop: " + slop); success = false; } } @@ -189,7 +186,7 @@ return; } // if we get here, we tried 5 times and never got to short circuit out of - // the retry loop, so this is a failure. + // the retry loop, so this is a failure. fail("After 5 attempts, region assignments were not balanced."); } @@ -207,9 +204,8 @@ * Wait until all the regions are assigned. */ private void waitForAllRegionsAssigned() { - while (getRegionCount() < 22) { - // while (!cluster.getMaster().allRegionsAssigned()) { - LOG.debug("Waiting for there to be 22 regions, but there are " + getRegionCount() + " right now."); + while (getRegionCount() < 62) { + LOG.debug("Waiting for there to be 62 regions, but there are " + getRegionCount() + " right now."); try { Thread.sleep(1000); } catch (InterruptedException e) {} @@ -223,7 +219,7 @@ private HRegion createAregion(byte [] startKey, byte [] endKey) throws IOException { HRegion region = createNewHRegion(desc, startKey, endKey); - byte [] keyToWrite = startKey == null ? Bytes.toBytes("row_000") : startKey; + byte [] keyToWrite = startKey == null ? Bytes.toBytes("row_0000") : startKey; Put put = new Put(keyToWrite); put.add(FAMILY_NAME, null, Bytes.toBytes("test")); region.put(put); Index: src/main/java/org/apache/hadoop/hbase/master/RegionManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (revision 12813) +++ src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (working copy) @@ -1381,7 +1381,7 @@ double avg = master.getAverageLoad(); // nothing to balance if server load not more then average load - if(servLoad.getLoad() <= Math.ceil(avg) || avg <= 2.0) { + if(servLoad.getLoad() <= Math.floor(avg) || avg <= 2.0) { return; } @@ -1447,12 +1447,12 @@ return 0; // there is no low loaded servers int lowSrvCount = loadToServers.get(loadToServers.firstKey()).size(); - int numRegionsToClose = 0; - int numSrvRegs = srvLoad.getNumberOfRegions(); int numMoveToLowLoaded = (avgLoadMinusSlop - lowestLoad) * lowSrvCount; - numRegionsToClose = numSrvRegs - (int)Math.ceil(avgLoad); + + int numRegionsToClose = numSrvRegs - (int)Math.floor(avgLoad); numRegionsToClose = Math.min(numRegionsToClose, numMoveToLowLoaded); + if (LOG.isDebugEnabled()) { LOG.debug("Server(s) are carrying only " + lowestLoad + " regions. " + "Server " + srvName + " is most loaded (" + numSrvRegs +