diff --git bin/hbase-daemon.sh bin/hbase-daemon.sh index f582ce7..a6f4fc3 100755 --- bin/hbase-daemon.sh +++ bin/hbase-daemon.sh @@ -32,6 +32,15 @@ # # Modelled after $HADOOP_HOME/bin/hadoop-daemon.sh +cleanZNode() { + if [ -f $HBASE_ZNODE_FILE ]; then + #call ZK to delete the node + ZNODE=`cat $HBASE_ZNODE_FILE` + echo "Region Server $HBASE_ZNODE_FILE didn't stop properly. Cleaning ZNode ($ZNODE) to trigger an immediate recovery." + $bin/hbase zkcli delete $ZNODE > /dev/null 2>&1 + fi +} + usage="Usage: hbase-daemon.sh [--config ]\ (start|stop|restart) \ " @@ -96,7 +105,7 @@ fi mkdir -p "$HBASE_LOG_DIR" if [ "$HBASE_PID_DIR" = "" ]; then - HBASE_PID_DIR=/tmp + export HBASE_PID_DIR=/tmp fi if [ "$HBASE_IDENT_STRING" = "" ]; then @@ -121,6 +130,7 @@ logout=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.out loggc=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc loglog="${HBASE_LOG_DIR}/${HBASE_LOGFILE}" pid=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid +export HBASE_ZNODE_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.znode if [ "$HBASE_USE_GC_LOGFILE" = "true" ]; then export HBASE_GC_OPTS=" -Xloggc:${loggc}" @@ -148,9 +158,9 @@ case $startStop in # Add to the command log file vital stats on our environment. echo "`date` Starting $command on `hostname`" >> $loglog echo "`ulimit -a`" >> $loglog 2>&1 - nohup nice -n $HBASE_NICENESS "$HBASE_HOME"/bin/hbase \ + (nohup nice -n $HBASE_NICENESS "$HBASE_HOME"/bin/hbase \ --config "${HBASE_CONF_DIR}" \ - $command "$@" $startStop > "$logout" 2>&1 < /dev/null & + $command "$@" $startStop > "$logout" 2>&1 < /dev/null ; cleanZNode) & echo $! > $pid sleep 1; head "$logout" ;; diff --git src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 61a5988..d9137f7 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -19,6 +19,9 @@ */ package org.apache.hadoop.hbase.regionserver; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.io.StringWriter; import java.lang.Thread.UncaughtExceptionHandler; @@ -754,6 +757,9 @@ public class HRegionServer extends RegionServer } catch (KeeperException e) { LOG.warn("Failed deleting my ephemeral node", e); } + // We may have failed to delete the znode at the previous step, but + // we delete the file anyway: a second attempt to delete the znode is likely to fail again. + deleteMyEphemeralNodeOnDisk(); this.zooKeeper.close(); LOG.info("stopping server " + this.serverNameFromMasterPOV + "; zookeeper connection closed."); @@ -930,6 +936,9 @@ public class HRegionServer extends RegionServer // Set our ephemeral znode up in zookeeper now we have a name. createMyEphemeralNode(); + // Save it in a file, this will allow to see if we crash + writeMyEphemeralNodeOnDisk(); + // Master sent us hbase.rootdir to use. Should be fully qualified // path with file system specification included. Set 'fs.defaultFS' // to match the filesystem on hbase.rootdir else underlying hadoop hdfs @@ -964,11 +973,49 @@ public class HRegionServer extends RegionServer return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString()); } + private String getMyEphemeralNodeFileName(){ + return System.getenv().get("HBASE_ZNODE_FILE"); + } + + private void createMyEphemeralNode() throws KeeperException { ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(), HConstants.EMPTY_BYTE_ARRAY); } + private void writeMyEphemeralNodeOnDisk() throws IOException { + String fileName = getMyEphemeralNodeFileName(); + + if (fileName==null){ + LOG.warn("No filename given to save the znode used, it won't be saved "+ + "(Environment variable HBASE_ZNODE_FILE is not set)."); + return; + } + + FileWriter fstream = new FileWriter(fileName); + BufferedWriter out = new BufferedWriter(fstream); + try { + out.write(getMyEphemeralNodePath()+"\n"); + } finally { + try { + out.close(); + } finally { + fstream.close(); + } + } + } + + private void deleteMyEphemeralNodeOnDisk(){ + String fileName = getMyEphemeralNodeFileName(); + + if (fileName==null){ + return; + } + + File f = new File(fileName); + f.delete(); + } + private void deleteMyEphemeralNode() throws KeeperException { ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath()); }