Index: src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java (revision 0) @@ -0,0 +1,96 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import junit.framework.Assert; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ClockOutOfSyncException; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.Test; + + +public class TestClockSkewDetection { + private static final Log LOG = + LogFactory.getLog(TestClockSkewDetection.class); + + @Test + public void testClockSkewDetection() throws Exception { + final Configuration conf = HBaseConfiguration.create(); + ServerManager sm = new ServerManager(new Server() { + @Override + public CatalogTracker getCatalogTracker() { + return null; + } + + @Override + public Configuration getConfiguration() { + return conf; + } + + @Override + public String getServerName() { + return null; + } + + @Override + public ZooKeeperWatcher getZooKeeper() { + return null; + } + + @Override + public void abort(String why, Throwable e) {} + + @Override + public boolean isStopped() { + return false; + } + + @Override + public void stop(String why) { + }}, null, null); + + LOG.debug("regionServerStartup 1"); + HServerInfo hsi1 = new HServerInfo(new HServerAddress("example.org:1234"), + System.currentTimeMillis(), -1, "example.com"); + sm.regionServerStartup(hsi1, System.currentTimeMillis()); + + long maxSkew = 30000; + + try { + LOG.debug("regionServerStartup 2"); + HServerInfo hsi2 = new HServerInfo(new HServerAddress("example.org:1235"), + System.currentTimeMillis(), -1, "example.com"); + sm.regionServerStartup(hsi2, System.currentTimeMillis() - maxSkew * 2); + Assert.assertTrue("HMaster should have thrown an ClockOutOfSyncException " + + "but didn't.", false); + } catch(ClockOutOfSyncException e) { + //we want an exception + LOG.info("Recieved expected exception: "+e); + } + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/ClockOutOfSyncException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ClockOutOfSyncException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/ClockOutOfSyncException.java (revision 0) @@ -0,0 +1,33 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; + +/** + * This exception is thrown by the master when a region server clock skew is + * too high. + */ +@SuppressWarnings("serial") +public class ClockOutOfSyncException extends IOException { + public ClockOutOfSyncException(String message) { + super(message); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1033288) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -52,9 +52,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Chore; +import org.apache.hadoop.hbase.ClockOutOfSyncException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HConstants.OperationStatusCode; import org.apache.hadoop.hbase.HMsg; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; @@ -69,6 +69,7 @@ import org.apache.hadoop.hbase.UnknownRowLockException; import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.YouAreDeadException; +import org.apache.hadoop.hbase.HConstants.OperationStatusCode; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.RootLocationEditor; @@ -108,6 +109,7 @@ import org.apache.hadoop.hbase.replication.regionserver.Replication; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CompressionTest; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.InfoServer; import org.apache.hadoop.hbase.util.Pair; @@ -118,6 +120,7 @@ import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.DNS; import org.apache.zookeeper.KeeperException; @@ -1397,7 +1400,7 @@ * Let the master know we're here Run initialization using parameters passed * us by the master. */ - private MapWritable reportForDuty() { + private MapWritable reportForDuty() throws IOException { HServerAddress masterAddress = null; while (!stopped && (masterAddress = getMaster()) == null) { sleeper.sleep(); @@ -1415,8 +1418,19 @@ this.serverInfo.getServerAddress()); this.serverInfo.setLoad(buildServerLoad()); LOG.info("Telling master at " + masterAddress + " that we are up"); - result = this.hbaseMaster.regionServerStartup(this.serverInfo); + result = this.hbaseMaster.regionServerStartup(this.serverInfo, + EnvironmentEdgeManager.currentTimeMillis()); break; + } catch (RemoteException e) { + IOException ioe = e.unwrapRemoteException(); + if (ioe instanceof ClockOutOfSyncException) { + LOG.fatal("Master rejected startup because clock is out of sync", + ioe); + // Re-throw IOE will cause RS to abort + throw ioe; + } else { + LOG.warn("remote error telling master we are up", e); + } } catch (IOException e) { LOG.warn("error telling master we are up", e); } catch (KeeperException e) { Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1033288) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -31,6 +31,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ClockOutOfSyncException; import org.apache.hadoop.hbase.HMsg; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; @@ -48,7 +49,6 @@ import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; -import org.apache.hadoop.hbase.util.Threads; /** * The ServerManager class manages info about region servers - HServerInfo, @@ -84,13 +84,13 @@ private final Server master; private final MasterServices services; - private final LogCleaner logCleaner; - // Reporting to track master metrics. private final MasterMetrics metrics; final DeadServer deadservers = new DeadServer(); + private final long maxSkew; + /** * Constructor. * @param master @@ -105,20 +105,16 @@ this.services = services; this.metrics = metrics; Configuration c = master.getConfiguration(); - String n = Thread.currentThread().getName(); - this.logCleaner = - new LogCleaner(c.getInt("hbase.master.cleaner.interval", 60 * 1000), - master, c, this.services.getMasterFileSystem().getFileSystem(), - this.services.getMasterFileSystem().getOldLogDir()); - Threads.setDaemonThreadRunning(logCleaner, n + ".oldLogCleaner"); + maxSkew = c.getLong("hbase.master.maxclockskew", 30000); } /** * Let the server manager know a new regionserver has come online * @param serverInfo + * @param serverCurrentTime The current time of the region server in ms * @throws IOException */ - void regionServerStartup(final HServerInfo serverInfo) + void regionServerStartup(final HServerInfo serverInfo, long serverCurrentTime) throws IOException { // Test for case where we get a region startup message from a regionserver // that has been quickly restarted but whose znode expiration handler has @@ -130,6 +126,7 @@ HServerInfo info = new HServerInfo(serverInfo); checkIsDead(info.getServerName(), "STARTUP"); checkAlreadySameHostPort(info); + checkClockSkew(info, serverCurrentTime); recordNewServer(info, false, null); } @@ -168,6 +165,24 @@ } /** + * Checks if the clock skew between the server and the master. If the clock + * skew is too much it will throw an Exception. + * @throws ClockOutOfSyncException + */ + private void checkClockSkew(final HServerInfo serverInfo, + final long serverCurrentTime) + throws ClockOutOfSyncException { + long skew = System.currentTimeMillis() - serverCurrentTime; + if (skew > maxSkew) { + String message = "Server " + serverInfo.getServerName() + " has been " + + "rejected; Reported time is too far out of sync with master. " + + "Time difference of " + skew + "ms > max allowed of " + maxSkew + "ms"; + LOG.warn(message); + throw new ClockOutOfSyncException(message); + } + } + + /** * If this server is on the dead list, reject it with a LeaseStillHeldException * @param serverName Server name formatted as host_port_startcode. * @param what START or REPORT @@ -651,11 +666,9 @@ } /** - * Stop the ServerManager. - *

- * Currently just interrupts the ServerMonitor and LogCleaner chores. + * Stop the ServerManager. Currently does nothing. */ public void stop() { - this.logCleaner.interrupt(); + } } Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1033288) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -165,6 +165,7 @@ private volatile boolean balanceSwitch = true; private Thread catalogJanitorChore; + private LogCleaner logCleaner; /** * Initializes the HMaster. The steps are as follows: @@ -516,6 +517,14 @@ this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, conf.getInt("hbase.master.executor.tableops.threads", 3)); + // Start log cleaner thread + String n = Thread.currentThread().getName(); + this.logCleaner = + new LogCleaner(conf.getInt("hbase.master.cleaner.interval", 60 * 1000), + this, conf, getMasterFileSystem().getFileSystem(), + getMasterFileSystem().getOldLogDir()); + Threads.setDaemonThreadRunning(logCleaner, n + ".oldLogCleaner"); + // Put up info server. int port = this.conf.getInt("hbase.master.info.port", 60010); if (port >= 0) { @@ -544,6 +553,7 @@ } if (this.rpcServer != null) this.rpcServer.stop(); // Clean up and close up shop + this.logCleaner.interrupt(); if (this.infoServer != null) { LOG.info("Stopping infoServer"); try { @@ -577,7 +587,9 @@ } } - public MapWritable regionServerStartup(final HServerInfo serverInfo) + @Override + public MapWritable regionServerStartup(final HServerInfo serverInfo, + final long serverCurrentTime) throws IOException { // Set the ip into the passed in serverInfo. Its ip is more than likely // not the ip that the master sees here. See at end of this method where @@ -589,7 +601,7 @@ serverInfo.setServerAddress(new HServerAddress(rsAddress, serverInfo.getServerAddress().getPort())); // Register with server manager - this.serverManager.regionServerStartup(serverInfo); + this.serverManager.regionServerStartup(serverInfo, serverCurrentTime); // Send back some config info MapWritable mw = createConfigurationSubset(); mw.put(new Text("hbase.regionserver.address"), new Text(rsAddress)); Index: src/main/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java (revision 1033288) +++ src/main/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java (working copy) @@ -78,7 +78,9 @@ *

  • Version 24: HBASE-2473, create table with regions.
  • *
  • Version 25: Added openRegion and Stoppable/Abortable to API.
  • *
  • Version 26: New master and Increment, 0.90 version bump.
  • + *
  • Version 27: HBASE-3168, Added serverCurrentTime to regionServerStartup + * in HMasterRegionInterface.
  • * */ - public static final long versionID = 26L; + public static final long versionID = 27L; } Index: src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java (revision 1033288) +++ src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java (working copy) @@ -40,11 +40,13 @@ /** * Called when a region server first starts * @param info server info + * @param serverCurrentTime The current time of the region server in ms * @throws IOException e * @return Configuration for the regionserver to use: e.g. filesystem, * hbase rootdir, etc. */ - public MapWritable regionServerStartup(HServerInfo info) throws IOException; + public MapWritable regionServerStartup(HServerInfo info, + long serverCurrentTime) throws IOException; /** * Called to renew lease, tell master what the region server is doing and to