diff --git a/bin/decommission.rb b/bin/decommission.rb new file mode 100755 index 0000000..0751c0b --- /dev/null +++ b/bin/decommission.rb @@ -0,0 +1,167 @@ +# Copyright 2011 The Apache Software Foundation +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Decommission a server. Move regions off a server one-by-one. +# Assumes balancer has been disabled. +include Java +import org.apache.hadoop.hbase.HConstants +import org.apache.hadoop.hbase.HBaseConfiguration +import org.apache.hadoop.hbase.client.HBaseAdmin +import org.apache.hadoop.hbase.client.Get +import org.apache.hadoop.hbase.client.HTable +import org.apache.hadoop.hbase.client.HConnectionManager +import org.apache.hadoop.hbase.HServerAddress +import org.apache.hadoop.hbase.util.Bytes +import org.apache.hadoop.conf.Configuration +import org.apache.commons.logging.Log +import org.apache.commons.logging.LogFactory + +# Name of this script +NAME = "decommission" + +# Print usage for this script +def usage + print <<-EOF +Usage: #{NAME}.rb + servername Servername is hostname, port, and startcode delimited by a comma + For example: a.example.com,60020,121211121112121212 + EOF + exit 1 +end + +# Check region has moved successful and is indeed hosted on another server +# Wait until that is the case. +def move(config, admin, r, newServer, decommission) + admin.move(Bytes.toBytes(r.getEncodedName()), Bytes.toBytes(newServer)) + # Wait till its up on new server before moving on + t = HTable.new(config, r.getTableDesc().getName()) + # Do a Get on first column family found to prove something exists + g = Get.new(r.getStartKey()) + # Don't get whole row, just first family (Trying to get subset only) + g.addFamily(r.getTableDesc().getFamiliesKeys().iterator().next()) + result = t.get(g) + raise IOError, "No result verifying new region location" unless result + # This doesnt' work for region w/ startKey of '' + # raise IOError, "Result does not match row; startkey=%s, row=%s" % [Bytes.toString(r.getStartKey()), Bytes.toString(result.getRow())] unless Bytes.equals(r.getStartKey(), result.getRow()) + meta = HTable.new(config, HConstants::META_TABLE_NAME) + g = Get.new(r.getRegionName()) + g.addColumn(HConstants::CATALOG_FAMILY, HConstants::SERVER_QUALIFIER) + result = meta.get(g) + value = result.getValue(HConstants::CATALOG_FAMILY, HConstants::SERVER_QUALIFIER) + nuServer = Bytes.toString(value) + raise RuntimeException, "Old location and new still match: %s" % nuServer if nuServer == decommission +end + +# Return list of servers minus the server to decommission +# Will raise exception if don't find passed server to decommission +def getServersMinusServerToDecommission(admin, decommission) + # What servers are in the cluster + serverInfos = admin.getClusterStatus().getServerInfo() + # Make an array of clusters minus the ond to decommission + servers = [] + found = 0 + for server in serverInfos + server = server.getServerName() + if server == decommission + found = 1 + next + end + servers << server + end + # Check server to decommission is actually present + raise ArgumentError, "Server %s not online" % decommission unless found + return servers +end + +# Disable the DEBUG-level logging +def configureLogging(apacheLogger) + # Configure log4j to not spew so much + logger = org.apache.log4j.Logger.getLogger("org.apache.hadoop.hbase.client") + logger.setLevel(org.apache.log4j.Level::INFO) + apacheLogger.info("Set logging level on client to INFO level") +end + +# Get configuration instance +def getConfiguration() + config = HBaseConfiguration.create() + # No prefetching on .META. + config.setInt("hbase.client.prefetch.limit", 1) + # Make a config that retries at short intervals many times + config.setInt("hbase.client.pause", 500) + config.setInt("hbase.client.retries.number", 100) + return config +end + +def getRegions(config, decommission) + # Now get list of regions on this server + connection = HConnectionManager::getConnection(config) + + # Break up passed servername into its parts. + parts = decommission.split(',') + + # Make a HSA with hostname and port separated by ':' + hsa = HServerAddress.new(parts[0] + ":" + parts[1]) + # Get list of online regions + rs = connection.getHRegionConnection(hsa) + return rs.getOnlineRegions() +end + +# Check arguments +if ARGV.size != 1 + usage +end + +# Server to decommission +decommission = ARGV[0] + +# Set up logging. +LOG = LogFactory.getLog(NAME) +configureLogging(LOG) + +# Get configuration +config = getConfiguration() + +# Get an admin instance +admin = HBaseAdmin.new(config) + +# Start decommission +servers = getServersMinusServerToDecommission(admin, decommission) +regions = java.util.ArrayList.new() +count = 0 +while true + rs = getRegions(config, decommission) + break if rs.length == 0 + # Ready to start unloading + for r in rs + # Get a random server + server = servers[rand(servers.length)] + LOG.info("Moving " + count.to_s + ": region=" + r.getEncodedName() + " to server=" + server); + count = count + 1 + move(config, admin, r, server, decommission) + regions.add(r) + end +end +# Write out file of regions moved +filename = "/tmp/" + decommission +open(filename, 'w') do |f| + for r in regions + f.puts r.getRegionNameAsString() + end +end +LOG.info("Wrote list of moved regions to " + filename) diff --git a/bin/graceful_restart.sh b/bin/graceful_restart.sh new file mode 100755 index 0000000..e3bdc98 --- /dev/null +++ b/bin/graceful_restart.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# +#/** +# * Copyright 2011 The Apache Software Foundation +# * +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +# Move regions off a server, restart it, then move regions back. +bin=`dirname "$0"` +bin=`cd "$bin">/dev/null; pwd` +# This will set HBASE_HOME, etc. +. "$bin"/hbase-config.sh +if [ $# = 0 ]; then + echo "Usage: graceful_restart.sh [--config ] " + echo " servername Servername is hostname + port + startcode comma-delimited; e.g. example.org,60020,1212121212" + exit 1 +fi +# Get arguments +servername=$1 +echo "Make sure the balancer is disabled! This script does not do it for you!" +exec "$bin/hbase" org.jruby.Main "$bin"/decommission.rb ${servername} +# If we got here, then zero regions on this server. +# Stop then restart it. First dump name into a temp file because below commands read from files +hostname=`echo $servername|sed -e 's/\([^,]*\).*/\1/g'` +hosts="/tmp/$(basename $0).$$.tmp" +echo $hostname >> $hosts +base_remote_cmd="cd ${HBASE_HOME}; $bin/hbase-daemon.sh --config ${HBASE_CONF_DIR}" +stop_args="--hosts ${hosts} --config ${HBASE_CONF_DIR} $base_remote_cmd stop regionserver" +start_args="--hosts ${hosts} --config ${HBASE_CONF_DIR} $base_remote_cmd start regionserver" +exec "$bin/regionservers.sh" $stop_args +exec "$bin/regionservers.sh" $start_args