diff -ur hbase-0.94.6.1.3.0.0-109.el6/usr/lib/hbase/bin/graceful_stop.sh /usr/lib/hbase/bin/graceful_stop.sh --- hbase-0.94.6.1.3.0.0-109.el6/usr/lib/hbase/bin/graceful_stop.sh 2013-05-25 06:47:05.000000000 +0000 +++ /usr/lib/hbase/bin/graceful_stop.sh 2013-07-09 23:41:59.364355539 +0000 @@ -23,13 +23,14 @@ # Move regions off a server then stop it. Optionally restart and reload. # Turn off the balancer before running this script. function usage { - echo "Usage: graceful_stop.sh [--config ] [--restart [--reload]] [--thrift] [--rest] " - echo " thrift If we should stop/start thrift before/after the hbase stop/start" - echo " rest If we should stop/start rest before/after the hbase stop/start" - echo " restart If we should restart after graceful stop" - echo " reload Move offloaded regions back on to the restarted server" - echo " debug Print helpful debug information" - echo " hostname Hostname of server we are to stop" + echo "Usage: graceful_stop.sh [--config ] [--restart [--reload]] [--thrift] [--rest] [--maxhtreads xx] " + echo " thrift If we should stop/start thrift before/after the hbase stop/start" + echo " rest If we should stop/start rest before/after the hbase stop/start" + echo " restart If we should restart after graceful stop" + echo " reload Move offloaded regions back on to the restarted server" + echo " debug Print helpful debug information" + echo " maxthreads xx Limit the number of threads used by the region mover. Default value is 1." + echo " hostname Hostname of server we are to stop" exit 1 } @@ -47,6 +48,7 @@ debug= thrift= rest= +maxthreads=1 while [ $# -gt 0 ] do case "$1" in @@ -55,6 +57,7 @@ --restart) restart=true; shift;; --reload) reload=true; shift;; --debug) debug="--debug"; shift;; + --maxthreads) shift; maxthreads=$1; shift;; --) shift; break;; -*) usage ;; *) break;; # terminate while loop @@ -72,7 +75,7 @@ echo "Disabling balancer!" echo 'balance_switch false' | "$bin"/hbase --config ${HBASE_CONF_DIR} shell echo "Unloading $hostname region(s)" -HBASE_NOEXEC=true "$bin"/hbase --config ${HBASE_CONF_DIR} org.jruby.Main "$bin"/region_mover.rb --file=$filename $debug unload $hostname +HBASE_NOEXEC=true "$bin"/hbase --config ${HBASE_CONF_DIR} org.jruby.Main "$bin"/region_mover.rb --file=$filename $debug --maxthreads=$maxthreads unload $hostname echo "Unloaded $hostname region(s)" # Stop the server. Have to put hostname into its own little file for hbase-daemons.sh hosts="/tmp/$(basename $0).$$.tmp" @@ -95,7 +98,7 @@ fi if [ "$reload" != "" ]; then echo "Reloading $hostname region(s)" - HBASE_NOEXEC=true "$bin"/hbase --config ${HBASE_CONF_DIR} org.jruby.Main "$bin"/region_mover.rb --file=$filename $debug load $hostname + HBASE_NOEXEC=true "$bin"/hbase --config ${HBASE_CONF_DIR} org.jruby.Main "$bin"/region_mover.rb --file=$filename $debug --maxthreads=$maxthreads load $hostname echo "Reloaded $hostname region(s)" fi fi Only in /usr/lib/hbase/bin: graceful_stop.sh.orig Only in /usr/lib/hbase/bin: graceful_stop.sh.rej diff -ur hbase-0.94.6.1.3.0.0-109.el6/usr/lib/hbase/bin/region_mover.rb /usr/lib/hbase/bin/region_mover.rb --- hbase-0.94.6.1.3.0.0-109.el6/usr/lib/hbase/bin/region_mover.rb 2013-05-25 06:47:05.000000000 +0000 +++ /usr/lib/hbase/bin/region_mover.rb 2013-07-10 00:57:29.687355166 +0000 @@ -24,6 +24,7 @@ # Does not work for case of multiple regionservers all running on the # one node. require 'optparse' + include Java import org.apache.hadoop.hbase.HConstants import org.apache.hadoop.hbase.HBaseConfiguration @@ -329,18 +330,32 @@ break if rs.length == 0 count = 0 $LOG.info("Moving " + rs.length.to_s + " region(s) from " + servername + - " during this cycle"); - for r in rs - # Get a random server to move the region to. - server = servers[rand(servers.length)] - $LOG.info("Moving region " + r.getEncodedName() + " (" + count.to_s + - " of " + rs.length.to_s + ") to server=" + server); - count = count + 1 - # Assert we can scan region in its current location - isSuccessfulScan(admin, r) - # Now move it. - move(admin, r, server, servername) - movedRegions.add(r) + "with " + options[:maxthreads].to_s + " threads during this cycle"); + counter = 0 + while counter < rs.length do + server_index = 0 + threads = [] + while server_index < servers.length && counter < rs.length && server_index < options[:maxthreads] do + count += 1 + destination_server = server_index + # If we have less threads than servers, then choose servers randomly to not always pickup + # the same first x servers. + if options[:maxthreads] < servers.length + destination_server = rand(servers.length) + end + $LOG.info("Moving region " + rs[counter].getEncodedName() + " (" + count.to_s + + " of " + rs.length.to_s + ") to server=" + servers[destination_server]); + threads << Thread.new(rs[counter], servers[destination_server]) { |region,server| + # Assert we can scan region in its current location + isSuccessfulScan(admin, region) + # Now move it. + move(admin, region, server, servername) + movedRegions.add(region) + } + server_index += 1 + counter += 1 + end + threads.each { |aThread| aThread.join } end end if movedRegions.size() > 0 @@ -375,25 +390,36 @@ end $LOG.info("Moving " + regions.size().to_s + " regions to " + servername) count = 0 - for r in regions - exists = false - begin - isSuccessfulScan(admin, r) - exists = true - rescue org.apache.hadoop.hbase.NotServingRegionException => e - $LOG.info("Failed scan of " + e.message) - end - count = count + 1 - next unless exists - currentServer = getServerNameForRegion(admin, r) - if currentServer and currentServer == servername - $LOG.info("Region " + r.getRegionNameAsString() + " (" + count.to_s + - " of " + regions.length.to_s + ") already on target server=" + servername) - next + counter = 0 + while counter < regions.length do + thread_index = 0 + threads = [] + while thread_index < 10 && counter < regions.length && thread_index < options[:maxthreads] do + r = regions[counter] + counter = counter + 1 + exists = false + begin + isSuccessfulScan(admin, r) + exists = true + rescue org.apache.hadoop.hbase.NotServingRegionException => e + $LOG.info("Failed scan of " + e.message) + end + count = count + 1 + next unless exists + currentServer = getServerNameForRegion(admin, r) + if currentServer and currentServer == servername + $LOG.info("Region " + r.getRegionNameAsString() + " (" + count.to_s + + " of " + regions.length.to_s + ") already on target server=" + servername) + next + end + $LOG.info("Moving region " + r.getEncodedName() + " (" + count.to_s + + " of " + regions.length.to_s + ") to server=" + servername); + threads << Thread.new(r) { |region| + move(admin, region, servername, currentServer) + } + thread_index += 1 end - $LOG.info("Moving region " + r.getEncodedName() + " (" + count.to_s + - " of " + regions.length.to_s + ") to server=" + servername); - move(admin, r, servername, currentServer) + threads.each { |aThread| aThread.join } end end @@ -435,6 +461,7 @@ opts.banner = "Usage: #{NAME}.rb [options] load|unload " opts.separator 'Load or unload regions by moving one at a time' options[:file] = nil + options[:maxthreads] = 1 opts.on('-f', '--filename=FILE', 'File to save regions list into unloading, or read from loading; default /tmp/') do |file| options[:file] = file end @@ -449,6 +476,9 @@ opts.on('-x', '--excludefile=FILE', 'File with hosts-per-line to exclude as unload targets; default excludes only target host; useful for rack decommisioning.') do |file| options[:excludesFile] = file end + opts.on('-m', '--maxthreads=XX', 'Define the maximum number of threads to use to unload and reload the regions') do |number| + options[:maxthreads] = number.to_i + end end optparse.parse! @@ -464,6 +494,7 @@ end # Create a logger and save it to ruby global $LOG = configureLogging(options) + case ARGV[0] when 'load' loadRegions(options, hostname) Only in /usr/lib/hbase/bin: region_mover.rb.rej diff -ur hbase-0.94.6.1.3.0.0-109.el6/usr/lib/hbase/bin/rolling-restart.sh /usr/lib/hbase/bin/rolling-restart.sh --- hbase-0.94.6.1.3.0.0-109.el6/usr/lib/hbase/bin/rolling-restart.sh 2013-05-25 06:47:05.000000000 +0000 +++ /usr/lib/hbase/bin/rolling-restart.sh 2013-07-10 00:19:16.201468135 +0000 @@ -34,7 +34,7 @@ # # Modelled after $HADOOP_HOME/bin/slaves.sh. -usage="Usage: $0 [--config ] [--rs-only] [--master-only] [--graceful]" +usage="Usage: $0 [--config ] [--rs-only] [--master-only] [--graceful] [--maxthreads xx]" bin=`dirname "$0"` bin=`cd "$bin">/dev/null; pwd` @@ -57,23 +57,33 @@ RR_RS=1 RR_MASTER=1 RR_GRACEFUL=0 +RR_MAXTHREADS=1 -for x in "$@" ; do - case "$x" in +while [ $# -gt 0 ] +do + case "$1" in --rs-only|-r) RR_RS=1 RR_MASTER=0 RR_GRACEFUL=0 + shift ;; --master-only) RR_RS=0 RR_MASTER=1 RR_GRACEFUL=0 + shift ;; --graceful) RR_RS=0 RR_MASTER=0 RR_GRACEFUL=1 + shift + ;; + --maxthreads) + shift + RR_MAXTHREADS=$1 + shift ;; *) echo Bad argument: $x @@ -155,7 +165,7 @@ rs_parts=(${rs//,/ }) hostname=${rs_parts[0]} echo "Gracefully restarting: $hostname" - "$bin"/graceful_stop.sh --config "${HBASE_CONF_DIR}" --restart --reload --debug "$hostname" + "$bin"/graceful_stop.sh --config "${HBASE_CONF_DIR}" --restart --reload --debug --maxthreads "${RR_MAXTHREADS}" "$hostname" sleep 1 done fi